66 files changed, 14375 insertions, 4963 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
index a65f270..530a7ef 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ABIInfo.h
@@ -18,20 +18,25 @@ namespace llvm {
   class Value;
   class LLVMContext;
   class DataLayout;
+  class Type;
 }
 
 namespace clang {
   class ASTContext;
   class TargetInfo;
 
-  namespace CodeGen {
-    class ABIArgInfo;
-    class Address;
-    class CGCXXABI;
-    class CGFunctionInfo;
-    class CodeGenFunction;
-    class CodeGenTypes;
-  }
+namespace CodeGen {
+  class ABIArgInfo;
+  class Address;
+  class CGCXXABI;
+  class CGFunctionInfo;
+  class CodeGenFunction;
+  class CodeGenTypes;
+  class SwiftABIInfo;
+
+namespace swiftcall {
+  class SwiftAggLowering;
+}
 
   // FIXME: All of this stuff should be part of the target interface
   // somehow. It is currently here because it is not clear how to factor
@@ -55,6 +60,8 @@ namespace clang {
 
     virtual ~ABIInfo();
 
+    virtual bool supportsSwift() const { return false; }
+
     CodeGen::CGCXXABI &getCXXABI() const;
     ASTContext &getContext() const;
     llvm::LLVMContext &getVMContext() const;
@@ -85,6 +92,8 @@ namespace clang {
                                        CodeGen::Address VAListAddr,
                                        QualType Ty) const = 0;
 
+    bool isAndroid() const;
+
     /// Emit the target dependent code to load a value of
     /// \arg Ty from the \c __builtin_ms_va_list pointed to by \arg VAListAddr.
     virtual CodeGen::Address EmitMSVAArg(CodeGen::CodeGenFunction &CGF,
@@ -110,7 +119,35 @@ namespace clang {
 
     CodeGen::ABIArgInfo
     getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const;
+
+
+  };
+
+  /// A refining implementation of ABIInfo for targets that support swiftcall.
+  ///
+  /// If we find ourselves wanting multiple such refinements, they'll probably
+  /// be independent refinements, and we should probably find another way
+  /// to do it than simple inheritance.
+  class SwiftABIInfo : public ABIInfo {
+  public:
+    SwiftABIInfo(CodeGen::CodeGenTypes &cgt) : ABIInfo(cgt) {}
+
+    bool supportsSwift() const final override { return true; }
+
+    virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                              ArrayRef<llvm::Type*> types,
+                                              bool asReturnValue) const = 0;
+
+    virtual bool isLegalVectorTypeForSwift(CharUnits totalSize,
+                                           llvm::Type *eltTy,
+                                           unsigned elts) const;
+
+    static bool classof(const ABIInfo *info) {
+      return info->supportsSwift();
+    }
   };
+
+}  // end namespace CodeGen
 }  // end namespace clang
 
 #endif
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
index 6d746c2..165b6dd 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
@@ -16,19 +16,21 @@
 #include "clang/Frontend/Utils.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/FunctionInfo.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -42,6 +44,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include <memory>
 using namespace clang;
@@ -58,9 +61,7 @@ class EmitAssemblyHelper {
 
   Timer CodeGenerationTime;
 
-  mutable legacy::PassManager *CodeGenPasses;
-  mutable legacy::PassManager *PerModulePasses;
-  mutable legacy::FunctionPassManager *PerFunctionPasses;
+  std::unique_ptr<raw_pwrite_stream> OS;
 
 private:
   TargetIRAnalysis getTargetIRAnalysis() const {
@@ -70,70 +71,44 @@ private:
     return TargetIRAnalysis();
   }
 
-  legacy::PassManager *getCodeGenPasses() const {
-    if (!CodeGenPasses) {
-      CodeGenPasses = new legacy::PassManager();
-      CodeGenPasses->add(
-          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
-    }
-    return CodeGenPasses;
-  }
-
-  legacy::PassManager *getPerModulePasses() const {
-    if (!PerModulePasses) {
-      PerModulePasses = new legacy::PassManager();
-      PerModulePasses->add(
-          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
-    }
-    return PerModulePasses;
-  }
-
-  legacy::FunctionPassManager *getPerFunctionPasses() const {
-    if (!PerFunctionPasses) {
-      PerFunctionPasses = new legacy::FunctionPassManager(TheModule);
-      PerFunctionPasses->add(
-          createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
-    }
-    return PerFunctionPasses;
-  }
+  /// Set LLVM command line options passed through -backend-option.
+  void setCommandLineOpts();
 
-  void CreatePasses(FunctionInfoIndex *FunctionIndex);
+  void CreatePasses(legacy::PassManager &MPM, legacy::FunctionPassManager &FPM,
+                    ModuleSummaryIndex *ModuleSummary);
 
   /// Generates the TargetMachine.
-  /// Returns Null if it is unable to create the target machine.
+  /// Leaves TM unchanged if it is unable to create the target machine.
   /// Some of our clang tests specify triples which are not built
   /// into clang. This is okay because these tests check the generated
   /// IR, and they require DataLayout which depends on the triple.
   /// In this case, we allow this method to fail and not report an error.
   /// When MustCreateTM is used, we print an error if we are unable to load
   /// the requested target.
-  TargetMachine *CreateTargetMachine(bool MustCreateTM);
+  void CreateTargetMachine(bool MustCreateTM);
 
   /// Add passes necessary to emit assembly or LLVM IR.
   ///
   /// \return True on success.
-  bool AddEmitPasses(BackendAction Action, raw_pwrite_stream &OS);
+  bool AddEmitPasses(legacy::PassManager &CodeGenPasses, BackendAction Action,
+                     raw_pwrite_stream &OS);
 
 public:
   EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts,
                      const clang::TargetOptions &TOpts,
                      const LangOptions &LOpts, Module *M)
       : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts),
-        TheModule(M), CodeGenerationTime("Code Generation Time"),
-        CodeGenPasses(nullptr), PerModulePasses(nullptr),
-        PerFunctionPasses(nullptr) {}
+        TheModule(M), CodeGenerationTime("Code Generation Time") {}
 
   ~EmitAssemblyHelper() {
-    delete CodeGenPasses;
-    delete PerModulePasses;
-    delete PerFunctionPasses;
     if (CodeGenOpts.DisableFree)
       BuryPointer(std::move(TM));
   }
 
   std::unique_ptr<TargetMachine> TM;
 
-  void EmitAssembly(BackendAction Action, raw_pwrite_stream *OS);
+  void EmitAssembly(BackendAction Action,
+                    std::unique_ptr<raw_pwrite_stream> OS);
 };
 
 // We need this wrapper to access LangOpts and CGOpts from extension functions
@@ -172,8 +147,19 @@ static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder,
   PM.add(createAddDiscriminatorsPass());
 }
 
+static void addCleanupPassesForSampleProfiler(
+    const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) {
+  // instcombine is needed before sample profile annotation because it converts
+  // certain function calls to be inlinable. simplifycfg and sroa are needed
+  // before instcombine for necessary preparation. E.g. load store is eliminated
+  // properly so that instcombine will not introduce unecessary liverange.
+  PM.add(createCFGSimplificationPass());
+  PM.add(createSROAPass());
+  PM.add(createInstructionCombiningPass());
+}
+
 static void addBoundsCheckingPass(const PassManagerBuilder &Builder,
-                                    legacy::PassManagerBase &PM) {
+                                  legacy::PassManagerBase &PM) {
   PM.add(createBoundsCheckingPass());
 }
 
@@ -189,6 +175,7 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
   Opts.TraceBB = CGOpts.SanitizeCoverageTraceBB;
   Opts.TraceCmp = CGOpts.SanitizeCoverageTraceCmp;
   Opts.Use8bitCounters = CGOpts.SanitizeCoverage8bitCounters;
+  Opts.TracePC = CGOpts.SanitizeCoverageTracePC;
   PM.add(createSanitizerCoverageModulePass(Opts));
 }
 
@@ -198,14 +185,17 @@ static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
       static_cast<const PassManagerBuilderWrapper&>(Builder);
   const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
   bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address);
-  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/false, Recover));
+  bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope;
+  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover,
+                                            UseAfterScope));
   PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover));
 }
 
 static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
                                             legacy::PassManagerBase &PM) {
-  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/true,
-                                            /*Recover*/true));
+  PM.add(createAddressSanitizerFunctionPass(
+      /*CompileKernel*/ true,
+      /*Recover*/ true, /*UseAfterScope*/ false));
   PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true,
                                           /*Recover*/true));
 }
@@ -243,6 +233,19 @@ static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder,
   PM.add(createDataFlowSanitizerPass(LangOpts.SanitizerBlacklistFiles));
 }
 
+static void addEfficiencySanitizerPass(const PassManagerBuilder &Builder,
+                                       legacy::PassManagerBase &PM) {
+  const PassManagerBuilderWrapper &BuilderWrapper =
+      static_cast<const PassManagerBuilderWrapper&>(Builder);
+  const LangOptions &LangOpts = BuilderWrapper.getLangOpts();
+  EfficiencySanitizerOptions Opts;
+  if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyCacheFrag))
+    Opts.ToolType = EfficiencySanitizerOptions::ESAN_CacheFrag;
+  else if (LangOpts.Sanitize.has(SanitizerKind::EfficiencyWorkingSet))
+    Opts.ToolType = EfficiencySanitizerOptions::ESAN_WorkingSet;
+  PM.add(createEfficiencySanitizerPass(Opts));
+}
+
 static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
                                          const CodeGenOptions &CodeGenOpts) {
   TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple);
@@ -277,7 +280,9 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts,
   MPM->add(createRewriteSymbolsPass(DL));
 }
 
-void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
+void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
+                                      legacy::FunctionPassManager &FPM,
+                                      ModuleSummaryIndex *ModuleSummary) {
   if (CodeGenOpts.DisableLLVMPasses)
     return;
 
@@ -300,7 +305,8 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
   switch (Inlining) {
   case CodeGenOptions::NoInlining:
     break;
-  case CodeGenOptions::NormalInlining: {
+  case CodeGenOptions::NormalInlining:
+  case CodeGenOptions::OnlyHintInlining: {
     PMBuilder.Inliner =
         createFunctionInliningPass(OptLevel, CodeGenOpts.OptimizeSize);
     break;
@@ -321,22 +327,28 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
   PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
   PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
 
-  PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
   PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
   PMBuilder.MergeFunctions = CodeGenOpts.MergeFunctions;
+  PMBuilder.PrepareForThinLTO = CodeGenOpts.EmitSummaryIndex;
   PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
   PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
 
-  legacy::PassManager *MPM = getPerModulePasses();
-
   // If we are performing a ThinLTO importing compile, invoke the LTO
-  // pipeline and pass down the in-memory function index.
-  if (FunctionIndex) {
-    PMBuilder.FunctionIndex = FunctionIndex;
-    PMBuilder.populateLTOPassManager(*MPM);
+  // pipeline and pass down the in-memory module summary index.
+  if (ModuleSummary) {
+    PMBuilder.ModuleSummary = ModuleSummary;
+    PMBuilder.populateThinLTOPassManager(MPM);
     return;
   }
 
+  // Add target-specific passes that need to run as early as possible.
+  if (TM)
+    PMBuilder.addExtension(
+        PassManagerBuilder::EP_EarlyAsPossible,
+        [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+          TM->addEarlyAsPossiblePasses(PM);
+        });
+
   PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
                          addAddDiscriminatorsPass);
 
@@ -401,15 +413,20 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
                            addDataFlowSanitizerPass);
   }
 
+  if (LangOpts.Sanitize.hasOneOf(SanitizerKind::Efficiency)) {
+    PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+                           addEfficiencySanitizerPass);
+    PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+                           addEfficiencySanitizerPass);
+  }
+
   // Set up the per-function pass manager.
-  legacy::FunctionPassManager *FPM = getPerFunctionPasses();
   if (CodeGenOpts.VerifyModule)
-    FPM->add(createVerifierPass());
-  PMBuilder.populateFunctionPassManager(*FPM);
+    FPM.add(createVerifierPass());
 
   // Set up the per-module pass manager.
   if (!CodeGenOpts.RewriteMapFiles.empty())
-    addSymbolRewriterPass(CodeGenOpts, MPM);
+    addSymbolRewriterPass(CodeGenOpts, &MPM);
 
   if (!CodeGenOpts.DisableGCov &&
       (CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)) {
@@ -424,25 +441,56 @@ void EmitAssemblyHelper::CreatePasses(FunctionInfoIndex *FunctionIndex) {
     Options.FunctionNamesInData =
         !CodeGenOpts.CoverageNoFunctionNamesInData;
     Options.ExitBlockBeforeBody = CodeGenOpts.CoverageExitBlockBeforeBody;
-    MPM->add(createGCOVProfilerPass(Options));
-    if (CodeGenOpts.getDebugInfo() == CodeGenOptions::NoDebugInfo)
-      MPM->add(createStripSymbolsPass(true));
+    MPM.add(createGCOVProfilerPass(Options));
+    if (CodeGenOpts.getDebugInfo() == codegenoptions::NoDebugInfo)
+      MPM.add(createStripSymbolsPass(true));
   }
 
-  if (CodeGenOpts.ProfileInstrGenerate) {
+  if (CodeGenOpts.hasProfileClangInstr()) {
     InstrProfOptions Options;
     Options.NoRedZone = CodeGenOpts.DisableRedZone;
     Options.InstrProfileOutput = CodeGenOpts.InstrProfileOutput;
-    MPM->add(createInstrProfilingPass(Options));
+    MPM.add(createInstrProfilingLegacyPass(Options));
+  }
+  if (CodeGenOpts.hasProfileIRInstr()) {
+    if (!CodeGenOpts.InstrProfileOutput.empty())
+      PMBuilder.PGOInstrGen = CodeGenOpts.InstrProfileOutput;
+    else
+      PMBuilder.PGOInstrGen = "default.profraw";
+  }
+  if (CodeGenOpts.hasProfileIRUse())
+    PMBuilder.PGOInstrUse = CodeGenOpts.ProfileInstrumentUsePath;
+
+  if (!CodeGenOpts.SampleProfileFile.empty()) {
+    MPM.add(createPruneEHPass());
+    MPM.add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile));
+    PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
+                           addCleanupPassesForSampleProfiler);
   }
 
-  if (!CodeGenOpts.SampleProfileFile.empty())
-    MPM->add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile));
+  PMBuilder.populateFunctionPassManager(FPM);
+  PMBuilder.populateModulePassManager(MPM);
+}
 
-  PMBuilder.populateModulePassManager(*MPM);
+void EmitAssemblyHelper::setCommandLineOpts() {
+  SmallVector<const char *, 16> BackendArgs;
+  BackendArgs.push_back("clang"); // Fake program name.
+  if (!CodeGenOpts.DebugPass.empty()) {
+    BackendArgs.push_back("-debug-pass");
+    BackendArgs.push_back(CodeGenOpts.DebugPass.c_str());
+  }
+  if (!CodeGenOpts.LimitFloatPrecision.empty()) {
+    BackendArgs.push_back("-limit-float-precision");
+    BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
+  }
+  for (const std::string &BackendOption : CodeGenOpts.BackendOptions)
+    BackendArgs.push_back(BackendOption.c_str());
+  BackendArgs.push_back(nullptr);
+  llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
+                                    BackendArgs.data());
 }
 
-TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
+void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   // Create the TargetMachine for generating code.
   std::string Error;
   std::string Triple = TheModule->getTargetTriple();
@@ -450,7 +498,7 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   if (!TheTarget) {
     if (MustCreateTM)
       Diags.Report(diag::err_fe_unable_to_create_target) << Error;
-    return nullptr;
+    return;
   }
 
   unsigned CodeModel =
@@ -464,27 +512,11 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   assert(CodeModel != ~0u && "invalid code model!");
   llvm::CodeModel::Model CM = static_cast<llvm::CodeModel::Model>(CodeModel);
 
-  SmallVector<const char *, 16> BackendArgs;
-  BackendArgs.push_back("clang"); // Fake program name.
-  if (!CodeGenOpts.DebugPass.empty()) {
-    BackendArgs.push_back("-debug-pass");
-    BackendArgs.push_back(CodeGenOpts.DebugPass.c_str());
-  }
-  if (!CodeGenOpts.LimitFloatPrecision.empty()) {
-    BackendArgs.push_back("-limit-float-precision");
-    BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
-  }
-  for (const std::string &BackendOption : CodeGenOpts.BackendOptions)
-    BackendArgs.push_back(BackendOption.c_str());
-  BackendArgs.push_back(nullptr);
-  llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
-                                    BackendArgs.data());
-
   std::string FeaturesStr =
       llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
 
   // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp.
-  llvm::Reloc::Model RM = llvm::Reloc::Default;
+  llvm::Optional<llvm::Reloc::Model> RM;
   if (CodeGenOpts.RelocationModel == "static") {
     RM = llvm::Reloc::Static;
   } else if (CodeGenOpts.RelocationModel == "pic") {
@@ -539,38 +571,29 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.UseInitArray = CodeGenOpts.UseInitArray;
   Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS;
   Options.CompressDebugSections = CodeGenOpts.CompressDebugSections;
+  Options.RelaxELFRelocations = CodeGenOpts.RelaxELFRelocations;
 
   // Set EABI version.
-  Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(CodeGenOpts.EABIVersion)
+  Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(TargetOpts.EABIVersion)
                             .Case("4", llvm::EABI::EABI4)
                             .Case("5", llvm::EABI::EABI5)
                             .Case("gnu", llvm::EABI::GNU)
                             .Default(llvm::EABI::Default);
 
+  if (LangOpts.SjLjExceptions)
+    Options.ExceptionModel = llvm::ExceptionHandling::SjLj;
+
   Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD;
   Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
   Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
   Options.NoZerosInBSS = CodeGenOpts.NoZeroInitializedInBSS;
   Options.UnsafeFPMath = CodeGenOpts.UnsafeFPMath;
   Options.StackAlignmentOverride = CodeGenOpts.StackAlignment;
-  Options.PositionIndependentExecutable = LangOpts.PIELevel != 0;
   Options.FunctionSections = CodeGenOpts.FunctionSections;
   Options.DataSections = CodeGenOpts.DataSections;
   Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
   Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
-  switch (CodeGenOpts.getDebuggerTuning()) {
-  case CodeGenOptions::DebuggerKindGDB:
-    Options.DebuggerTuning = llvm::DebuggerKind::GDB;
-    break;
-  case CodeGenOptions::DebuggerKindLLDB:
-    Options.DebuggerTuning = llvm::DebuggerKind::LLDB;
-    break;
-  case CodeGenOptions::DebuggerKindSCE:
-    Options.DebuggerTuning = llvm::DebuggerKind::SCE;
-    break;
-  default:
-    break;
-  }
+  Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
 
   Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
   Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
@@ -582,24 +605,18 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
   Options.MCOptions.ABIName = TargetOpts.ABI;
 
-  TargetMachine *TM = TheTarget->createTargetMachine(Triple, TargetOpts.CPU,
-                                                     FeaturesStr, Options,
-                                                     RM, CM, OptLevel);
-
-  return TM;
+  TM.reset(TheTarget->createTargetMachine(Triple, TargetOpts.CPU, FeaturesStr,
+                                          Options, RM, CM, OptLevel));
 }
 
-bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
+bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses,
+                                       BackendAction Action,
                                        raw_pwrite_stream &OS) {
-
-  // Create the code generator passes.
-  legacy::PassManager *PM = getCodeGenPasses();
-
   // Add LibraryInfo.
   llvm::Triple TargetTriple(TheModule->getTargetTriple());
   std::unique_ptr<TargetLibraryInfoImpl> TLII(
       createTLII(TargetTriple, CodeGenOpts));
-  PM->add(new TargetLibraryInfoWrapperPass(*TLII));
+  CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII));
 
   // Normal mode, emit a .s or .o file by running the code generator. Note,
   // this also adds codegenerator level optimization passes.
@@ -615,9 +632,9 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
   // "codegen" passes so that it isn't run multiple times when there is
   // inlining happening.
   if (CodeGenOpts.OptimizationLevel > 0)
-    PM->add(createObjCARCContractPass());
+    CodeGenPasses.add(createObjCARCContractPass());
 
-  if (TM->addPassesToEmitFile(*PM, OS, CGFT,
+  if (TM->addPassesToEmitFile(CodeGenPasses, OS, CGFT,
                               /*DisableVerify=*/!CodeGenOpts.VerifyModule)) {
     Diags.Report(diag::err_fe_unable_to_interface_with_target);
     return false;
@@ -627,14 +644,15 @@ bool EmitAssemblyHelper::AddEmitPasses(BackendAction Action,
 }
 
 void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
-                                      raw_pwrite_stream *OS) {
+                                      std::unique_ptr<raw_pwrite_stream> OS) {
   TimeRegion Region(llvm::TimePassesIsEnabled ? &CodeGenerationTime : nullptr);
 
+  setCommandLineOpts();
+
   bool UsesCodeGen = (Action != Backend_EmitNothing &&
                       Action != Backend_EmitBC &&
                       Action != Backend_EmitLL);
-  if (!TM)
-    TM.reset(CreateTargetMachine(UsesCodeGen));
+  CreateTargetMachine(UsesCodeGen);
 
   if (UsesCodeGen && !TM)
     return;
@@ -644,41 +662,54 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
   // If we are performing a ThinLTO importing compile, load the function
   // index into memory and pass it into CreatePasses, which will add it
   // to the PassManagerBuilder and invoke LTO passes.
-  std::unique_ptr<FunctionInfoIndex> FunctionIndex;
+  std::unique_ptr<ModuleSummaryIndex> ModuleSummary;
   if (!CodeGenOpts.ThinLTOIndexFile.empty()) {
-    ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
-        llvm::getFunctionIndexForFile(CodeGenOpts.ThinLTOIndexFile,
-                                      [&](const DiagnosticInfo &DI) {
-                                        TheModule->getContext().diagnose(DI);
-                                      });
+    ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
+        llvm::getModuleSummaryIndexForFile(
+            CodeGenOpts.ThinLTOIndexFile, [&](const DiagnosticInfo &DI) {
+              TheModule->getContext().diagnose(DI);
+            });
     if (std::error_code EC = IndexOrErr.getError()) {
       std::string Error = EC.message();
       errs() << "Error loading index file '" << CodeGenOpts.ThinLTOIndexFile
              << "': " << Error << "\n";
       return;
     }
-    FunctionIndex = std::move(IndexOrErr.get());
-    assert(FunctionIndex && "Expected non-empty function index");
+    ModuleSummary = std::move(IndexOrErr.get());
+    assert(ModuleSummary && "Expected non-empty module summary index");
   }
 
-  CreatePasses(FunctionIndex.get());
+  legacy::PassManager PerModulePasses;
+  PerModulePasses.add(
+      createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
+
+  legacy::FunctionPassManager PerFunctionPasses(TheModule);
+  PerFunctionPasses.add(
+      createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
+
+  CreatePasses(PerModulePasses, PerFunctionPasses, ModuleSummary.get());
+
+  legacy::PassManager CodeGenPasses;
+  CodeGenPasses.add(
+      createTargetTransformInfoWrapperPass(getTargetIRAnalysis()));
 
   switch (Action) {
   case Backend_EmitNothing:
     break;
 
   case Backend_EmitBC:
-    getPerModulePasses()->add(createBitcodeWriterPass(
-        *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitFunctionSummary));
+    PerModulePasses.add(createBitcodeWriterPass(
+        *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitSummaryIndex,
+        CodeGenOpts.EmitSummaryIndex));
     break;
 
   case Backend_EmitLL:
-    getPerModulePasses()->add(
+    PerModulePasses.add(
         createPrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists));
     break;
 
   default:
-    if (!AddEmitPasses(Action, *OS))
+    if (!AddEmitPasses(CodeGenPasses, Action, *OS))
       return;
   }
 
@@ -688,46 +719,165 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
   // Run passes. For now we do all passes at once, but eventually we
   // would like to have the option of streaming code generation.
 
-  if (PerFunctionPasses) {
+  {
     PrettyStackTraceString CrashInfo("Per-function optimization");
 
-    PerFunctionPasses->doInitialization();
+    PerFunctionPasses.doInitialization();
     for (Function &F : *TheModule)
       if (!F.isDeclaration())
-        PerFunctionPasses->run(F);
-    PerFunctionPasses->doFinalization();
+        PerFunctionPasses.run(F);
+    PerFunctionPasses.doFinalization();
   }
 
-  if (PerModulePasses) {
+  {
     PrettyStackTraceString CrashInfo("Per-module optimization passes");
-    PerModulePasses->run(*TheModule);
+    PerModulePasses.run(*TheModule);
   }
 
-  if (CodeGenPasses) {
+  {
     PrettyStackTraceString CrashInfo("Code generation");
-    CodeGenPasses->run(*TheModule);
+    CodeGenPasses.run(*TheModule);
   }
 }
 
 void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
                               const CodeGenOptions &CGOpts,
                               const clang::TargetOptions &TOpts,
-                              const LangOptions &LOpts, StringRef TDesc,
+                              const LangOptions &LOpts, const llvm::DataLayout &TDesc,
                               Module *M, BackendAction Action,
-                              raw_pwrite_stream *OS) {
+                              std::unique_ptr<raw_pwrite_stream> OS) {
   EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
 
-  AsmHelper.EmitAssembly(Action, OS);
+  AsmHelper.EmitAssembly(Action, std::move(OS));
 
-  // If an optional clang TargetInfo description string was passed in, use it to
-  // verify the LLVM TargetMachine's DataLayout.
-  if (AsmHelper.TM && !TDesc.empty()) {
+  // Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's
+  // DataLayout.
+  if (AsmHelper.TM) {
     std::string DLDesc = M->getDataLayout().getStringRepresentation();
-    if (DLDesc != TDesc) {
+    if (DLDesc != TDesc.getStringRepresentation()) {
       unsigned DiagID = Diags.getCustomDiagID(
           DiagnosticsEngine::Error, "backend data layout '%0' does not match "
                                     "expected target description '%1'");
-      Diags.Report(DiagID) << DLDesc << TDesc;
+      Diags.Report(DiagID) << DLDesc << TDesc.getStringRepresentation();
+    }
+  }
+}
+
+static const char* getSectionNameForBitcode(const Triple &T) {
+  switch (T.getObjectFormat()) {
+  case Triple::MachO:
+    return "__LLVM,__bitcode";
+  case Triple::COFF:
+  case Triple::ELF:
+  case Triple::UnknownObjectFormat:
+    return ".llvmbc";
+  }
+  llvm_unreachable("Unimplemented ObjectFormatType");
+}
+
+static const char* getSectionNameForCommandline(const Triple &T) {
+  switch (T.getObjectFormat()) {
+  case Triple::MachO:
+    return "__LLVM,__cmdline";
+  case Triple::COFF:
+  case Triple::ELF:
+  case Triple::UnknownObjectFormat:
+    return ".llvmcmd";
+  }
+  llvm_unreachable("Unimplemented ObjectFormatType");
+}
+
+// With -fembed-bitcode, save a copy of the llvm IR as data in the
+// __LLVM,__bitcode section.
+void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
+                         llvm::MemoryBufferRef Buf) {
+  if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off)
+    return;
+
+  // Save llvm.compiler.used and remote it.
+  SmallVector<Constant*, 2> UsedArray;
+  SmallSet<GlobalValue*, 4> UsedGlobals;
+  Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0);
+  GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true);
+  for (auto *GV : UsedGlobals) {
+    if (GV->getName() != "llvm.embedded.module" &&
+        GV->getName() != "llvm.cmdline")
+      UsedArray.push_back(
+          ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+  }
+  if (Used)
+    Used->eraseFromParent();
+
+  // Embed the bitcode for the llvm module.
+  std::string Data;
+  ArrayRef<uint8_t> ModuleData;
+  Triple T(M->getTargetTriple());
+  // Create a constant that contains the bitcode.
+  // In case of embedding a marker, ignore the input Buf and use the empty
+  // ArrayRef. It is also legal to create a bitcode marker even Buf is empty.
+  if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) {
+    if (!isBitcode((const unsigned char *)Buf.getBufferStart(),
+                   (const unsigned char *)Buf.getBufferEnd())) {
+      // If the input is LLVM Assembly, bitcode is produced by serializing
+      // the module. Use-lists order need to be perserved in this case.
+      llvm::raw_string_ostream OS(Data);
+      llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
+      ModuleData =
+          ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
+    } else
+      // If the input is LLVM bitcode, write the input byte stream directly.
+      ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(),
+                                     Buf.getBufferSize());
+  }
+  llvm::Constant *ModuleConstant =
+      llvm::ConstantDataArray::get(M->getContext(), ModuleData);
+  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+      *M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
+      ModuleConstant);
+  GV->setSection(getSectionNameForBitcode(T));
+  UsedArray.push_back(
+      ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+  if (llvm::GlobalVariable *Old =
+          M->getGlobalVariable("llvm.embedded.module", true)) {
+    assert(Old->hasOneUse() &&
+           "llvm.embedded.module can only be used once in llvm.compiler.used");
+    GV->takeName(Old);
+    Old->eraseFromParent();
+  } else {
+    GV->setName("llvm.embedded.module");
+  }
+
+  // Skip if only bitcode needs to be embedded.
+  if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode) {
+    // Embed command-line options.
+    ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CGOpts.CmdArgs.data()),
+                              CGOpts.CmdArgs.size());
+    llvm::Constant *CmdConstant =
+      llvm::ConstantDataArray::get(M->getContext(), CmdData);
+    GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true,
+                                  llvm::GlobalValue::PrivateLinkage,
+                                  CmdConstant);
+    GV->setSection(getSectionNameForCommandline(T));
+    UsedArray.push_back(
+        ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
+    if (llvm::GlobalVariable *Old =
+            M->getGlobalVariable("llvm.cmdline", true)) {
+      assert(Old->hasOneUse() &&
+             "llvm.cmdline can only be used once in llvm.compiler.used");
+      GV->takeName(Old);
+      Old->eraseFromParent();
+    } else {
+      GV->setName("llvm.cmdline");
     }
   }
+
+  if (UsedArray.empty())
+    return;
+
+  // Recreate llvm.compiler.used.
+  ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size());
+  auto *NewUsed = new GlobalVariable(
+      *M, ATy, false, llvm::GlobalValue::AppendingLinkage,
+      llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used");
+  NewUsed->setSection("llvm.metadata");
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
index 24de30b..7b747c1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGAtomic.cpp
@@ -79,7 +79,7 @@ namespace {
         auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment());
         AtomicSizeInBits = C.toBits(
             C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1)
-                .RoundUpToAlignment(lvalue.getAlignment()));
+                .alignTo(lvalue.getAlignment()));
         auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldPointer());
         auto OffsetInChars =
             (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) *
@@ -221,11 +221,13 @@ namespace {
     /// \param IsWeak true if atomic operation is weak, false otherwise.
     /// \returns Pair of values: previous value from storage (value type) and
     /// boolean flag (i1 type) with true if success and false otherwise.
-    std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange(
-        RValue Expected, RValue Desired,
-        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
-        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
-        bool IsWeak = false);
+    std::pair<RValue, llvm::Value *>
+    EmitAtomicCompareExchange(RValue Expected, RValue Desired,
+                              llvm::AtomicOrdering Success =
+                                  llvm::AtomicOrdering::SequentiallyConsistent,
+                              llvm::AtomicOrdering Failure =
+                                  llvm::AtomicOrdering::SequentiallyConsistent,
+                              bool IsWeak = false);
 
     /// \brief Emits atomic update.
     /// \param AO Atomic ordering.
@@ -241,11 +243,6 @@ namespace {
     /// Materialize an atomic r-value in atomic-layout memory.
     Address materializeRValue(RValue rvalue) const;
 
-    /// \brief Translates LLVM atomic ordering to GNU atomic ordering for
-    /// libcalls.
-    static AtomicExpr::AtomicOrderingKind
-    translateAtomicOrdering(const llvm::AtomicOrdering AO);
-
     /// \brief Creates temp alloca for intermediate operations on atomic value.
     Address CreateTempAlloca() const;
   private:
@@ -260,13 +257,17 @@ namespace {
     /// \brief Emits atomic compare-and-exchange op as a libcall.
     llvm::Value *EmitAtomicCompareExchangeLibcall(
         llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
-        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
-        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent);
+        llvm::AtomicOrdering Success =
+            llvm::AtomicOrdering::SequentiallyConsistent,
+        llvm::AtomicOrdering Failure =
+            llvm::AtomicOrdering::SequentiallyConsistent);
     /// \brief Emits atomic compare-and-exchange op as LLVM instruction.
     std::pair<llvm::Value *, llvm::Value *> EmitAtomicCompareExchangeOp(
         llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
-        llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
-        llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+        llvm::AtomicOrdering Success =
+            llvm::AtomicOrdering::SequentiallyConsistent,
+        llvm::AtomicOrdering Failure =
+            llvm::AtomicOrdering::SequentiallyConsistent,
         bool IsWeak = false);
     /// \brief Emit atomic update as libcalls.
     void
@@ -286,25 +287,6 @@ namespace {
   };
 }
 
-AtomicExpr::AtomicOrderingKind
-AtomicInfo::translateAtomicOrdering(const llvm::AtomicOrdering AO) {
-  switch (AO) {
-  case llvm::Unordered:
-  case llvm::NotAtomic:
-  case llvm::Monotonic:
-    return AtomicExpr::AO_ABI_memory_order_relaxed;
-  case llvm::Acquire:
-    return AtomicExpr::AO_ABI_memory_order_acquire;
-  case llvm::Release:
-    return AtomicExpr::AO_ABI_memory_order_release;
-  case llvm::AcquireRelease:
-    return AtomicExpr::AO_ABI_memory_order_acq_rel;
-  case llvm::SequentiallyConsistent:
-    return AtomicExpr::AO_ABI_memory_order_seq_cst;
-  }
-  llvm_unreachable("Unhandled AtomicOrdering");
-}
-
 Address AtomicInfo::CreateTempAlloca() const {
   Address TempAlloca = CGF.CreateMemTemp(
       (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy
@@ -323,8 +305,7 @@ static RValue emitAtomicLibcall(CodeGenFunction &CGF,
                                 QualType resultType,
                                 CallArgList &args) {
   const CGFunctionInfo &fnInfo =
-    CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args,
-            FunctionType::ExtInfo(), RequiredArgs::All);
+    CGF.CGM.getTypes().arrangeBuiltinFunctionCall(resultType, args);
   llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
   llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName);
   return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args);
@@ -422,33 +403,39 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
 /// instructions to cope with the provided (but possibly only dynamically known)
 /// FailureOrder.
 static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
-                                        bool IsWeak, Address Dest,
-                                        Address Ptr, Address Val1,
-                                        Address Val2,
+                                        bool IsWeak, Address Dest, Address Ptr,
+                                        Address Val1, Address Val2,
                                         llvm::Value *FailureOrderVal,
                                         uint64_t Size,
                                         llvm::AtomicOrdering SuccessOrder) {
   llvm::AtomicOrdering FailureOrder;
   if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
-    switch (FO->getSExtValue()) {
-    default:
-      FailureOrder = llvm::Monotonic;
-      break;
-    case AtomicExpr::AO_ABI_memory_order_consume:
-    case AtomicExpr::AO_ABI_memory_order_acquire:
-      FailureOrder = llvm::Acquire;
-      break;
-    case AtomicExpr::AO_ABI_memory_order_seq_cst:
-      FailureOrder = llvm::SequentiallyConsistent;
-      break;
-    }
-    if (FailureOrder >= SuccessOrder) {
-      // Don't assert on undefined behaviour.
+    auto FOS = FO->getSExtValue();
+    if (!llvm::isValidAtomicOrderingCABI(FOS))
+      FailureOrder = llvm::AtomicOrdering::Monotonic;
+    else
+      switch ((llvm::AtomicOrderingCABI)FOS) {
+      case llvm::AtomicOrderingCABI::relaxed:
+      case llvm::AtomicOrderingCABI::release:
+      case llvm::AtomicOrderingCABI::acq_rel:
+        FailureOrder = llvm::AtomicOrdering::Monotonic;
+        break;
+      case llvm::AtomicOrderingCABI::consume:
+      case llvm::AtomicOrderingCABI::acquire:
+        FailureOrder = llvm::AtomicOrdering::Acquire;
+        break;
+      case llvm::AtomicOrderingCABI::seq_cst:
+        FailureOrder = llvm::AtomicOrdering::SequentiallyConsistent;
+        break;
+      }
+    if (isStrongerThan(FailureOrder, SuccessOrder)) {
+      // Don't assert on undefined behavior "failure argument shall be no
+      // stronger than the success argument".
       FailureOrder =
-        llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
+          llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
     }
-    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size,
-                      SuccessOrder, FailureOrder);
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+                      FailureOrder);
     return;
   }
 
@@ -456,9 +443,10 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   llvm::BasicBlock *MonotonicBB = nullptr, *AcquireBB = nullptr,
                    *SeqCstBB = nullptr;
   MonotonicBB = CGF.createBasicBlock("monotonic_fail", CGF.CurFn);
-  if (SuccessOrder != llvm::Monotonic && SuccessOrder != llvm::Release)
+  if (SuccessOrder != llvm::AtomicOrdering::Monotonic &&
+      SuccessOrder != llvm::AtomicOrdering::Release)
     AcquireBB = CGF.createBasicBlock("acquire_fail", CGF.CurFn);
-  if (SuccessOrder == llvm::SequentiallyConsistent)
+  if (SuccessOrder == llvm::AtomicOrdering::SequentiallyConsistent)
     SeqCstBB = CGF.createBasicBlock("seqcst_fail", CGF.CurFn);
 
   llvm::BasicBlock *ContBB = CGF.createBasicBlock("atomic.continue", CGF.CurFn);
@@ -472,25 +460,25 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   // doesn't fold to a constant for the ordering.
   CGF.Builder.SetInsertPoint(MonotonicBB);
   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                    Size, SuccessOrder, llvm::Monotonic);
+                    Size, SuccessOrder, llvm::AtomicOrdering::Monotonic);
   CGF.Builder.CreateBr(ContBB);
 
   if (AcquireBB) {
     CGF.Builder.SetInsertPoint(AcquireBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, SuccessOrder, llvm::Acquire);
+                      Size, SuccessOrder, llvm::AtomicOrdering::Acquire);
     CGF.Builder.CreateBr(ContBB);
-    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
+    SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
-    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire),
+    SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
                 AcquireBB);
   }
   if (SeqCstBB) {
     CGF.Builder.SetInsertPoint(SeqCstBB);
-    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, SuccessOrder, llvm::SequentiallyConsistent);
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+                      llvm::AtomicOrdering::SequentiallyConsistent);
     CGF.Builder.CreateBr(ContBB);
-    SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
+    SI->addCase(CGF.Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
                 SeqCstBB);
   }
 
@@ -1037,40 +1025,39 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
   if (isa<llvm::ConstantInt>(Order)) {
-    int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-    switch (ord) {
-    case AtomicExpr::AO_ABI_memory_order_relaxed:
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::Monotonic);
-      break;
-    case AtomicExpr::AO_ABI_memory_order_consume:
-    case AtomicExpr::AO_ABI_memory_order_acquire:
-      if (IsStore)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::Acquire);
-      break;
-    case AtomicExpr::AO_ABI_memory_order_release:
-      if (IsLoad)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::Release);
-      break;
-    case AtomicExpr::AO_ABI_memory_order_acq_rel:
-      if (IsLoad || IsStore)
-        break; // Avoid crashing on code with undefined behavior
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::AcquireRelease);
-      break;
-    case AtomicExpr::AO_ABI_memory_order_seq_cst:
-      EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, llvm::SequentiallyConsistent);
-      break;
-    default: // invalid order
-      // We should not ever get here normally, but it's hard to
-      // enforce that in general.
-      break;
-    }
+    auto ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+    // We should not ever get to a case where the ordering isn't a valid C ABI
+    // value, but it's hard to enforce that in general.
+    if (llvm::isValidAtomicOrderingCABI(ord))
+      switch ((llvm::AtomicOrderingCABI)ord) {
+      case llvm::AtomicOrderingCABI::relaxed:
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::Monotonic);
+        break;
+      case llvm::AtomicOrderingCABI::consume:
+      case llvm::AtomicOrderingCABI::acquire:
+        if (IsStore)
+          break; // Avoid crashing on code with undefined behavior
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::Acquire);
+        break;
+      case llvm::AtomicOrderingCABI::release:
+        if (IsLoad)
+          break; // Avoid crashing on code with undefined behavior
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::Release);
+        break;
+      case llvm::AtomicOrderingCABI::acq_rel:
+        if (IsLoad || IsStore)
+          break; // Avoid crashing on code with undefined behavior
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::AcquireRelease);
+        break;
+      case llvm::AtomicOrderingCABI::seq_cst:
+        EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+                     llvm::AtomicOrdering::SequentiallyConsistent);
+        break;
+      }
     if (RValTy->isVoidType())
       return RValue::get(nullptr);
 
@@ -1105,39 +1092,39 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   // Emit all the different atomics
   Builder.SetInsertPoint(MonotonicBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::Monotonic);
+               Size, llvm::AtomicOrdering::Monotonic);
   Builder.CreateBr(ContBB);
   if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::Acquire);
+                 Size, llvm::AtomicOrdering::Acquire);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
+    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
                 AcquireBB);
-    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acquire),
+    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acquire),
                 AcquireBB);
   }
   if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::Release);
+                 Size, llvm::AtomicOrdering::Release);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_release),
+    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
                 ReleaseBB);
   }
   if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, llvm::AcquireRelease);
+                 Size, llvm::AtomicOrdering::AcquireRelease);
     Builder.CreateBr(ContBB);
-    SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acq_rel),
+    SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
                 AcqRelBB);
   }
   Builder.SetInsertPoint(SeqCstBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, llvm::SequentiallyConsistent);
+               Size, llvm::AtomicOrdering::SequentiallyConsistent);
   Builder.CreateBr(ContBB);
-  SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
+  SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
               SeqCstBB);
 
   // Cleanup and return
@@ -1257,9 +1244,9 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
            CGF.getContext().VoidPtrTy);
   Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)),
            CGF.getContext().VoidPtrTy);
-  Args.add(RValue::get(
-               llvm::ConstantInt::get(CGF.IntTy, translateAtomicOrdering(AO))),
-           CGF.getContext().IntTy);
+  Args.add(
+      RValue::get(llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(AO))),
+      CGF.getContext().IntTy);
   emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args);
 }
 
@@ -1287,28 +1274,21 @@ bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) {
   bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType());
   // An atomic is inline if we don't need to use a libcall.
   bool AtomicIsInline = !AI.shouldUseLibcall();
+  // MSVC doesn't seem to do this for types wider than a pointer.
+  if (getContext().getTypeSize(LV.getType()) >
+      getContext().getTypeSize(getContext().getIntPtrType()))
+    return false;
   return IsVolatile && AtomicIsInline;
 }
 
-/// An type is a candidate for having its loads and stores be made atomic if
-/// we are operating under /volatile:ms *and* we know the access is volatile and
-/// performing such an operation can be performed without a libcall.
-bool CodeGenFunction::typeIsSuitableForInlineAtomic(QualType Ty,
-                                                    bool IsVolatile) const {
-  // An atomic is inline if we don't need to use a libcall (e.g. it is builtin).
-  bool AtomicIsInline = getContext().getTargetInfo().hasBuiltinAtomic(
-      getContext().getTypeSize(Ty), getContext().getTypeAlign(Ty));
-  return CGM.getCodeGenOpts().MSVolatile && IsVolatile && AtomicIsInline;
-}
-
 RValue CodeGenFunction::EmitAtomicLoad(LValue LV, SourceLocation SL,
                                        AggValueSlot Slot) {
   llvm::AtomicOrdering AO;
   bool IsVolatile = LV.isVolatileQualified();
   if (LV.getType()->isAtomicType()) {
-    AO = llvm::SequentiallyConsistent;
+    AO = llvm::AtomicOrdering::SequentiallyConsistent;
   } else {
-    AO = llvm::Acquire;
+    AO = llvm::AtomicOrdering::Acquire;
     IsVolatile = true;
   }
   return EmitAtomicLoad(LV, SL, AO, IsVolatile, Slot);
@@ -1462,11 +1442,11 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
            CGF.getContext().VoidPtrTy);
   Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)),
            CGF.getContext().VoidPtrTy);
-  Args.add(RValue::get(llvm::ConstantInt::get(
-               CGF.IntTy, translateAtomicOrdering(Success))),
+  Args.add(RValue::get(
+               llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Success))),
            CGF.getContext().IntTy);
-  Args.add(RValue::get(llvm::ConstantInt::get(
-               CGF.IntTy, translateAtomicOrdering(Failure))),
+  Args.add(RValue::get(
+               llvm::ConstantInt::get(CGF.IntTy, (int)llvm::toCABI(Failure))),
            CGF.getContext().IntTy);
   auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange",
                                               CGF.getContext().BoolTy, Args);
@@ -1477,8 +1457,9 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
 std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
     RValue Expected, RValue Desired, llvm::AtomicOrdering Success,
     llvm::AtomicOrdering Failure, bool IsWeak) {
-  if (Failure >= Success)
-    // Don't assert on undefined behavior.
+  if (isStrongerThan(Failure, Success))
+    // Don't assert on undefined behavior "failure argument shall be no stronger
+    // than the success argument".
     Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success);
 
   // Check whether we should use a library call.
@@ -1727,9 +1708,9 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue lvalue,
   bool IsVolatile = lvalue.isVolatileQualified();
   llvm::AtomicOrdering AO;
   if (lvalue.getType()->isAtomicType()) {
-    AO = llvm::SequentiallyConsistent;
+    AO = llvm::AtomicOrdering::SequentiallyConsistent;
   } else {
-    AO = llvm::Release;
+    AO = llvm::AtomicOrdering::Release;
     IsVolatile = true;
   }
   return EmitAtomicStore(rvalue, lvalue, AO, IsVolatile, isInit);
@@ -1772,9 +1753,9 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
                getContext().VoidPtrTy);
       args.add(RValue::get(EmitCastToVoidPtr(srcAddr.getPointer())),
                getContext().VoidPtrTy);
-      args.add(RValue::get(llvm::ConstantInt::get(
-                   IntTy, AtomicInfo::translateAtomicOrdering(AO))),
-               getContext().IntTy);
+      args.add(
+          RValue::get(llvm::ConstantInt::get(IntTy, (int)llvm::toCABI(AO))),
+          getContext().IntTy);
       emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
       return;
     }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
index ba2941e..e3658ab 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBlocks.cpp
@@ -125,10 +125,15 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
 
   llvm::Constant *init = llvm::ConstantStruct::getAnon(elements);
 
+  unsigned AddrSpace = 0;
+  if (C.getLangOpts().OpenCL)
+    AddrSpace = C.getTargetAddressSpace(LangAS::opencl_constant);
   llvm::GlobalVariable *global =
     new llvm::GlobalVariable(CGM.getModule(), init->getType(), true,
                              llvm::GlobalValue::InternalLinkage,
-                             init, "__block_descriptor_tmp");
+                             init, "__block_descriptor_tmp", nullptr,
+                             llvm::GlobalValue::NotThreadLocal,
+                             AddrSpace);
 
   return llvm::ConstantExpr::getBitCast(global, CGM.getBlockDescriptorType());
 }
@@ -262,6 +267,11 @@ static bool isSafeForCXXConstantCapture(QualType type) {
 static llvm::Constant *tryCaptureAsConstant(CodeGenModule &CGM,
                                             CodeGenFunction *CGF,
                                             const VarDecl *var) {
+  // Return if this is a function paramter. We shouldn't try to
+  // rematerialize default arguments of function parameters.
+  if (isa<ParmVarDecl>(var))
+    return nullptr;
+
   QualType type = var->getType();
 
   // We can only do this if the variable is const.
@@ -508,7 +518,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
   // At this point, we just have to add padding if the end align still
   // isn't aligned right.
   if (endAlign < maxFieldAlign) {
-    CharUnits newBlockSize = blockSize.RoundUpToAlignment(maxFieldAlign);
+    CharUnits newBlockSize = blockSize.alignTo(maxFieldAlign);
     CharUnits padding = newBlockSize - blockSize;
 
     // If we haven't yet added any fields, remember that there was an
@@ -775,35 +785,34 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     // Compute the address of the thing we're going to move into the
     // block literal.
     Address src = Address::invalid();
-    if (BlockInfo && CI.isNested()) {
-      // We need to use the capture from the enclosing block.
-      const CGBlockInfo::Capture &enclosingCapture =
-        BlockInfo->getCapture(variable);
-
-      // This is a [[type]]*, except that a byref entry wil just be an i8**.
-      src = Builder.CreateStructGEP(LoadBlockStruct(),
-                                    enclosingCapture.getIndex(),
-                                    enclosingCapture.getOffset(),
-                                    "block.capture.addr");
-    } else if (blockDecl->isConversionFromLambda()) {
+
+    if (blockDecl->isConversionFromLambda()) {
       // The lambda capture in a lambda's conversion-to-block-pointer is
       // special; we'll simply emit it directly.
       src = Address::invalid();
-    } else {
-      // Just look it up in the locals map, which will give us back a
-      // [[type]]*.  If that doesn't work, do the more elaborate DRE
-      // emission.
-      auto it = LocalDeclMap.find(variable);
-      if (it != LocalDeclMap.end()) {
-        src = it->second;
+    } else if (CI.isByRef()) {
+      if (BlockInfo && CI.isNested()) {
+        // We need to use the capture from the enclosing block.
+        const CGBlockInfo::Capture &enclosingCapture =
+            BlockInfo->getCapture(variable);
+
+        // This is a [[type]]*, except that a byref entry wil just be an i8**.
+        src = Builder.CreateStructGEP(LoadBlockStruct(),
+                                      enclosingCapture.getIndex(),
+                                      enclosingCapture.getOffset(),
+                                      "block.capture.addr");
       } else {
-        DeclRefExpr declRef(
-            const_cast<VarDecl *>(variable),
-            /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type,
-            VK_LValue, SourceLocation());
-        src = EmitDeclRefLValue(&declRef).getAddress();
+        auto I = LocalDeclMap.find(variable);
+        assert(I != LocalDeclMap.end());
+        src = I->second;
       }
-    }
+    } else {
+      DeclRefExpr declRef(const_cast<VarDecl *>(variable),
+                          /*RefersToEnclosingVariableOrCapture*/ CI.isNested(),
+                          type.getNonReferenceType(), VK_LValue,
+                          SourceLocation());
+      src = EmitDeclRefLValue(&declRef).getAddress();
+    };
 
     // For byrefs, we just write the pointer to the byref struct into
     // the block field.  There's no need to chase the forwarding
@@ -837,8 +846,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
 
     // If it's a reference variable, copy the reference into the block field.
     } else if (type->isReferenceType()) {
-      llvm::Value *ref = Builder.CreateLoad(src, "ref.val");
-      Builder.CreateStore(ref, blockField);
+      Builder.CreateStore(src.getPointer(), blockField);
 
     // If this is an ARC __strong block-pointer variable, don't do a
     // block copy.
@@ -924,7 +932,10 @@ llvm::Type *CodeGenModule::getBlockDescriptorType() {
                              UnsignedLongTy, UnsignedLongTy, nullptr);
 
   // Now form a pointer to that.
-  BlockDescriptorType = llvm::PointerType::getUnqual(BlockDescriptorType);
+  unsigned AddrSpace = 0;
+  if (getLangOpts().OpenCL)
+    AddrSpace = getContext().getTargetAddressSpace(LangAS::opencl_constant);
+  BlockDescriptorType = llvm::PointerType::get(BlockDescriptorType, AddrSpace);
   return BlockDescriptorType;
 }
 
@@ -1109,8 +1120,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
   }
 
   if (CGDebugInfo *DI = getDebugInfo()) {
-    if (CGM.getCodeGenOpts().getDebugInfo()
-          >= CodeGenOptions::LimitedDebugInfo) {
+    if (CGM.getCodeGenOpts().getDebugInfo() >=
+        codegenoptions::LimitedDebugInfo) {
       DI->setLocation(D->getLocation());
       DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, arg, argNum,
                                                localAddr, Builder);
@@ -1174,9 +1185,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
 
   // Create the function declaration.
   const FunctionProtoType *fnType = blockInfo.getBlockExpr()->getFunctionType();
-  const CGFunctionInfo &fnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      fnType->getReturnType(), args, fnType->getExtInfo(),
-      fnType->isVariadic());
+  const CGFunctionInfo &fnInfo =
+    CGM.getTypes().arrangeBlockFunctionDeclaration(fnType, args);
   if (CGM.ReturnSlotInterferesWithArgs(fnInfo))
     blockInfo.UsesStret = true;
 
@@ -1260,8 +1270,8 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
       const VarDecl *variable = CI.getVariable();
       DI->EmitLocation(Builder, variable->getLocation());
 
-      if (CGM.getCodeGenOpts().getDebugInfo()
-            >= CodeGenOptions::LimitedDebugInfo) {
+      if (CGM.getCodeGenOpts().getDebugInfo() >=
+          codegenoptions::LimitedDebugInfo) {
         const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
         if (capture.isConstant()) {
           auto addr = LocalDeclMap.find(variable)->second;
@@ -1329,8 +1339,8 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
                             C.VoidPtrTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
 
   // FIXME: it would be nice if these were mergeable with things with
   // identical semantics.
@@ -1505,8 +1515,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
                             C.VoidPtrTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
 
   // FIXME: We'd like to put these into a mergable by content, with
   // internal linkage.
@@ -1791,8 +1801,8 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
                         Context.VoidPtrTy);
   args.push_back(&src);
 
-  const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
-      R, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
 
   llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
 
@@ -1864,8 +1874,8 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
                         Context.VoidPtrTy);
   args.push_back(&src);
 
-  const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
-      R, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGF.CGM.getTypes().arrangeBuiltinFunctionDeclaration(R, args);
 
   llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
 
@@ -2108,7 +2118,7 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
 
   bool packed = false;
   CharUnits varAlign = getContext().getDeclAlign(D);
-  CharUnits varOffset = size.RoundUpToAlignment(varAlign);
+  CharUnits varOffset = size.alignTo(varAlign);
 
   // We may have to insert padding.
   if (varOffset != size) {
@@ -2285,9 +2295,36 @@ void CodeGenFunction::enterByrefCleanup(const AutoVarEmission &emission) {
 /// Adjust the declaration of something from the blocks API.
 static void configureBlocksRuntimeObject(CodeGenModule &CGM,
                                          llvm::Constant *C) {
-  if (!CGM.getLangOpts().BlocksRuntimeOptional) return;
-
   auto *GV = cast<llvm::GlobalValue>(C->stripPointerCasts());
+
+  if (CGM.getTarget().getTriple().isOSBinFormatCOFF()) {
+    IdentifierInfo &II = CGM.getContext().Idents.get(C->getName());
+    TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
+    DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+
+    assert((isa<llvm::Function>(C->stripPointerCasts()) ||
+            isa<llvm::GlobalVariable>(C->stripPointerCasts())) &&
+           "expected Function or GlobalVariable");
+
+    const NamedDecl *ND = nullptr;
+    for (const auto &Result : DC->lookup(&II))
+      if ((ND = dyn_cast<FunctionDecl>(Result)) ||
+          (ND = dyn_cast<VarDecl>(Result)))
+        break;
+
+    // TODO: support static blocks runtime
+    if (GV->isDeclaration() && (!ND || !ND->hasAttr<DLLExportAttr>())) {
+      GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+    } else {
+      GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+      GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+    }
+  }
+
+  if (!CGM.getLangOpts().BlocksRuntimeOptional)
+    return;
+
   if (GV->isDeclaration() && GV->hasExternalLinkage())
     GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage);
 }
@@ -2335,5 +2372,5 @@ llvm::Constant *CodeGenModule::getNSConcreteStackBlock() {
                                                Int8PtrTy->getPointerTo(),
                                                nullptr);
   configureBlocksRuntimeObject(*this, NSConcreteStackBlock);
-  return NSConcreteStackBlock;  
+  return NSConcreteStackBlock;
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h b/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h
index 489f341..027435d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuilder.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H
 #define LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H
 
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IRBuilder.h"
 #include "Address.h"
 #include "CodeGenTypeCache.h"
@@ -22,9 +23,7 @@ class CodeGenFunction;
 /// \brief This is an IRBuilder insertion helper that forwards to
 /// CodeGenFunction::InsertHelper, which adds necessary metadata to
 /// instructions.
-template <bool PreserveNames>
-class CGBuilderInserter
-    : protected llvm::IRBuilderDefaultInserter<PreserveNames> {
+class CGBuilderInserter : protected llvm::IRBuilderDefaultInserter {
 public:
   CGBuilderInserter() = default;
   explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {}
@@ -38,17 +37,10 @@ private:
   CodeGenFunction *CGF = nullptr;
 };
 
-// Don't preserve names on values in an optimized build.
-#ifdef NDEBUG
-#define PreserveNames false
-#else
-#define PreserveNames true
-#endif
-
-typedef CGBuilderInserter<PreserveNames> CGBuilderInserterTy;
+typedef CGBuilderInserter CGBuilderInserterTy;
 
-typedef llvm::IRBuilder<PreserveNames, llvm::ConstantFolder,
-                        CGBuilderInserterTy> CGBuilderBaseTy;
+typedef llvm::IRBuilder<llvm::ConstantFolder, CGBuilderInserterTy>
+    CGBuilderBaseTy;
 
 class CGBuilderTy : public CGBuilderBaseTy {
   /// Storing a reference to the type cache here makes it a lot easier
@@ -194,6 +186,12 @@ public:
                                    Addr.getPointer(), Index, Name),
                    Addr.getAlignment().alignmentAtOffset(Offset));
   }
+  Address CreateStructGEP(Address Addr, unsigned Index,
+                          const llvm::StructLayout *Layout,
+                          const llvm::Twine &Name = "") {
+    auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
+    return CreateStructGEP(Addr, Index, Offset, Name);
+  }
 
   /// Given
   ///   %addr = [n x T]* ...
@@ -298,8 +296,6 @@ public:
   }
 };
 
-#undef PreserveNames
-
 }  // end namespace CodeGen
 }  // end namespace clang
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
index 787ac53..a5fc531 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
 #include <sstream>
 
 using namespace clang;
@@ -105,9 +106,8 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
   llvm::Type *ValueType = Args[1]->getType();
   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
 
-  llvm::Value *Result =
-      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
-                                  llvm::SequentiallyConsistent);
+  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
+      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
   return EmitFromInt(CGF, Result, T, ValueType);
 }
 
@@ -167,9 +167,8 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
 
-  llvm::Value *Result =
-      CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
-                                  llvm::SequentiallyConsistent);
+  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
+      Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
   if (Invert)
     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
@@ -206,9 +205,9 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
 
-  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
-                                                llvm::SequentiallyConsistent,
-                                                llvm::SequentiallyConsistent);
+  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
+      Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
+      llvm::AtomicOrdering::SequentiallyConsistent);
   if (ReturnBool)
     // Extract boolean success flag and zext it to int.
     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
@@ -219,6 +218,51 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
                        ValueType);
 }
 
+// Emit a simple mangled intrinsic that has 1 argument and a return type
+// matching the argument type.
+static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
+                               const CallExpr *E,
+                               unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, Src0);
+}
+
+// Emit an intrinsic that has 2 operands of the same type as its result.
+static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
+                                const CallExpr *E,
+                                unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, { Src0, Src1 });
+}
+
+// Emit an intrinsic that has 3 operands of the same type as its result.
+static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
+                                 const CallExpr *E,
+                                 unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
+}
+
+// Emit an intrinsic that has 1 float or double operand, and 1 integer.
+static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
+                               const CallExpr *E,
+                               unsigned IntrinsicID) {
+  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
+  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
+
+  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
+  return CGF.Builder.CreateCall(F, {Src0, Src1});
+}
+
 /// EmitFAbs - Emit a call to @llvm.fabs().
 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -248,8 +292,8 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
     if (CGF.getTarget().isBigEndian()) {
       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
       V = CGF.Builder.CreateLShr(V, ShiftCst);
-    } 
-    // We are truncating value in order to extract the higher-order 
+    }
+    // We are truncating value in order to extract the higher-order
     // double, which we will be using to extract the sign from.
     IntTy = llvm::IntegerType::get(C, Width);
     V = CGF.Builder.CreateTrunc(V, IntTy);
@@ -288,6 +332,17 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
   return CGF.Builder.CreateExtractValue(Tmp, 0);
 }
 
+static Value *emitRangedBuiltin(CodeGenFunction &CGF,
+                                unsigned IntrinsicID,
+                                int low, int high) {
+    llvm::MDBuilder MDHelper(CGF.getLLVMContext());
+    llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
+    Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
+    llvm::Instruction *Call = CGF.Builder.CreateCall(F);
+    Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
+    return Call;
+}
+
 namespace {
   struct WidthAndSignedness {
     unsigned Width;
@@ -465,9 +520,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   case Builtin::BI__builtin_fabs:
   case Builtin::BI__builtin_fabsf:
   case Builtin::BI__builtin_fabsl: {
-    Value *Arg1 = EmitScalarExpr(E->getArg(0));
-    Value *Result = EmitFAbs(*this, Arg1);
-    return RValue::get(Result);
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
   }
   case Builtin::BI__builtin_fmod:
   case Builtin::BI__builtin_fmodf:
@@ -477,7 +530,51 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
     return RValue::get(Result);
   }
-
+  case Builtin::BI__builtin_copysign:
+  case Builtin::BI__builtin_copysignf:
+  case Builtin::BI__builtin_copysignl: {
+    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
+  }
+  case Builtin::BI__builtin_ceil:
+  case Builtin::BI__builtin_ceilf:
+  case Builtin::BI__builtin_ceill: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
+  }
+  case Builtin::BI__builtin_floor:
+  case Builtin::BI__builtin_floorf:
+  case Builtin::BI__builtin_floorl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
+  }
+  case Builtin::BI__builtin_trunc:
+  case Builtin::BI__builtin_truncf:
+  case Builtin::BI__builtin_truncl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
+  }
+  case Builtin::BI__builtin_rint:
+  case Builtin::BI__builtin_rintf:
+  case Builtin::BI__builtin_rintl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
+  }
+  case Builtin::BI__builtin_nearbyint:
+  case Builtin::BI__builtin_nearbyintf:
+  case Builtin::BI__builtin_nearbyintl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
+  }
+  case Builtin::BI__builtin_round:
+  case Builtin::BI__builtin_roundf:
+  case Builtin::BI__builtin_roundl: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
+  }
+  case Builtin::BI__builtin_fmin:
+  case Builtin::BI__builtin_fminf:
+  case Builtin::BI__builtin_fminl: {
+    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
+  }
+  case Builtin::BI__builtin_fmax:
+  case Builtin::BI__builtin_fmaxf:
+  case Builtin::BI__builtin_fmaxl: {
+    return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
+  }
   case Builtin::BI__builtin_conj:
   case Builtin::BI__builtin_conjf:
   case Builtin::BI__builtin_conjl: {
@@ -645,10 +742,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   case Builtin::BI__builtin_bswap16:
   case Builtin::BI__builtin_bswap32:
   case Builtin::BI__builtin_bswap64: {
-    Value *ArgValue = EmitScalarExpr(E->getArg(0));
-    llvm::Type *ArgType = ArgValue->getType();
-    Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
-    return RValue::get(Builder.CreateCall(F, ArgValue));
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
+  }
+  case Builtin::BI__builtin_bitreverse8:
+  case Builtin::BI__builtin_bitreverse16:
+  case Builtin::BI__builtin_bitreverse32:
+  case Builtin::BI__builtin_bitreverse64: {
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
   }
   case Builtin::BI__builtin_object_size: {
     unsigned Type =
@@ -751,13 +851,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
   }
 
-  case Builtin::BI__builtin_isinf: {
-    // isinf(x) --> fabs(x) == infinity
+  case Builtin::BI__builtin_isinf:
+  case Builtin::BI__builtin_isfinite: {
+    // isinf(x)    --> fabs(x) == infinity
+    // isfinite(x) --> fabs(x) != infinity
+    // x != NaN via the ordered compare in either case.
     Value *V = EmitScalarExpr(E->getArg(0));
-    V = EmitFAbs(*this, V);
-
-    V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
-    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
+    Value *Fabs = EmitFAbs(*this, V);
+    Constant *Infinity = ConstantFP::getInfinity(V->getType());
+    CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
+                                  ? CmpInst::FCMP_OEQ
+                                  : CmpInst::FCMP_ONE;
+    Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
+    return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
   }
 
   case Builtin::BI__builtin_isinf_sign: {
@@ -795,19 +901,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
   }
 
-  case Builtin::BI__builtin_isfinite: {
-    // isfinite(x) --> x == x && fabs(x) != infinity;
-    Value *V = EmitScalarExpr(E->getArg(0));
-    Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
-
-    Value *Abs = EmitFAbs(*this, V);
-    Value *IsNotInf =
-      Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
-
-    V = Builder.CreateAnd(Eq, IsNotInf, "and");
-    return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
-  }
-
   case Builtin::BI__builtin_fpclassify: {
     Value *V = EmitScalarExpr(E->getArg(5));
     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
@@ -1258,7 +1351,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::StoreInst *Store =
       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
                                  StoreSize);
-    Store->setAtomic(llvm::Release);
+    Store->setAtomic(llvm::AtomicOrdering::Release);
     return RValue::get(nullptr);
   }
 
@@ -1270,7 +1363,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     // any way to safely use it... but in practice, it mostly works
     // to use it with non-atomic loads and stores to get acquire/release
     // semantics.
-    Builder.CreateFence(llvm::SequentiallyConsistent);
+    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
     return RValue::get(nullptr);
   }
 
@@ -1294,9 +1387,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
                getContext().VoidPtrTy);
     const CGFunctionInfo &FuncInfo =
-        CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
-                                               FunctionType::ExtInfo(),
-                                               RequiredArgs::All);
+        CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
@@ -1320,30 +1411,27 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       switch (ord) {
       case 0:  // memory_order_relaxed
       default: // invalid order
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::Monotonic);
+        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+                                         llvm::AtomicOrdering::Monotonic);
         break;
-      case 1:  // memory_order_consume
-      case 2:  // memory_order_acquire
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::Acquire);
+      case 1: // memory_order_consume
+      case 2: // memory_order_acquire
+        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+                                         llvm::AtomicOrdering::Acquire);
         break;
-      case 3:  // memory_order_release
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::Release);
+      case 3: // memory_order_release
+        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+                                         llvm::AtomicOrdering::Release);
         break;
-      case 4:  // memory_order_acq_rel
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::AcquireRelease);
+      case 4: // memory_order_acq_rel
+
+        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+                                         llvm::AtomicOrdering::AcquireRelease);
         break;
-      case 5:  // memory_order_seq_cst
-        Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-                                         Ptr, NewVal,
-                                         llvm::SequentiallyConsistent);
+      case 5: // memory_order_seq_cst
+        Result = Builder.CreateAtomicRMW(
+            llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
+            llvm::AtomicOrdering::SequentiallyConsistent);
         break;
       }
       Result->setVolatile(Volatile);
@@ -1360,9 +1448,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       createBasicBlock("seqcst", CurFn)
     };
     llvm::AtomicOrdering Orders[5] = {
-      llvm::Monotonic, llvm::Acquire, llvm::Release,
-      llvm::AcquireRelease, llvm::SequentiallyConsistent
-    };
+        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
+        llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
+        llvm::AtomicOrdering::SequentiallyConsistent};
 
     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
@@ -1406,13 +1494,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       switch (ord) {
       case 0:  // memory_order_relaxed
       default: // invalid order
-        Store->setOrdering(llvm::Monotonic);
+        Store->setOrdering(llvm::AtomicOrdering::Monotonic);
         break;
       case 3:  // memory_order_release
-        Store->setOrdering(llvm::Release);
+        Store->setOrdering(llvm::AtomicOrdering::Release);
         break;
       case 5:  // memory_order_seq_cst
-        Store->setOrdering(llvm::SequentiallyConsistent);
+        Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
         break;
       }
       return RValue::get(nullptr);
@@ -1426,8 +1514,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       createBasicBlock("seqcst", CurFn)
     };
     llvm::AtomicOrdering Orders[3] = {
-      llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
-    };
+        llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
+        llvm::AtomicOrdering::SequentiallyConsistent};
 
     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
@@ -1466,16 +1554,17 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
         break;
       case 1:  // memory_order_consume
       case 2:  // memory_order_acquire
-        Builder.CreateFence(llvm::Acquire, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
         break;
       case 3:  // memory_order_release
-        Builder.CreateFence(llvm::Release, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
         break;
       case 4:  // memory_order_acq_rel
-        Builder.CreateFence(llvm::AcquireRelease, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
         break;
       case 5:  // memory_order_seq_cst
-        Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
+        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
+                            Scope);
         break;
       }
       return RValue::get(nullptr);
@@ -1492,23 +1581,23 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
 
     Builder.SetInsertPoint(AcquireBB);
-    Builder.CreateFence(llvm::Acquire, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(1), AcquireBB);
     SI->addCase(Builder.getInt32(2), AcquireBB);
 
     Builder.SetInsertPoint(ReleaseBB);
-    Builder.CreateFence(llvm::Release, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(3), ReleaseBB);
 
     Builder.SetInsertPoint(AcqRelBB);
-    Builder.CreateFence(llvm::AcquireRelease, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(4), AcqRelBB);
 
     Builder.SetInsertPoint(SeqCstBB);
-    Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
+    Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(5), SeqCstBB);
 
@@ -1794,7 +1883,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       break;
     }
 
-    
+
     llvm::Value *Carry;
     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
     Builder.CreateStore(Sum, SumOutPtr);
@@ -1839,9 +1928,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::Value *Comparand =
       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
 
-    auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
-                                              SequentiallyConsistent,
-                                              SequentiallyConsistent);
+    auto Result =
+        Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+                                    AtomicOrdering::SequentiallyConsistent,
+                                    AtomicOrdering::SequentiallyConsistent);
     Result->setVolatile(true);
 
     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
@@ -1853,44 +1943,47 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
         EmitScalarExpr(E->getArg(0)),
         EmitScalarExpr(E->getArg(2)),
         EmitScalarExpr(E->getArg(1)),
-        SequentiallyConsistent,
-        SequentiallyConsistent);
+        AtomicOrdering::SequentiallyConsistent,
+        AtomicOrdering::SequentiallyConsistent);
       CXI->setVolatile(true);
       return RValue::get(Builder.CreateExtractValue(CXI, 0));
   }
   case Builtin::BI_InterlockedIncrement: {
+    llvm::Type *IntTy = ConvertType(E->getType());
     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
       AtomicRMWInst::Add,
       EmitScalarExpr(E->getArg(0)),
-      ConstantInt::get(Int32Ty, 1),
-      llvm::SequentiallyConsistent);
+      ConstantInt::get(IntTy, 1),
+      llvm::AtomicOrdering::SequentiallyConsistent);
     RMWI->setVolatile(true);
-    return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1)));
+    return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)));
   }
   case Builtin::BI_InterlockedDecrement: {
+    llvm::Type *IntTy = ConvertType(E->getType());
     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
       AtomicRMWInst::Sub,
       EmitScalarExpr(E->getArg(0)),
-      ConstantInt::get(Int32Ty, 1),
-      llvm::SequentiallyConsistent);
+      ConstantInt::get(IntTy, 1),
+      llvm::AtomicOrdering::SequentiallyConsistent);
     RMWI->setVolatile(true);
-    return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1)));
+    return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)));
   }
   case Builtin::BI_InterlockedExchangeAdd: {
     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
       AtomicRMWInst::Add,
       EmitScalarExpr(E->getArg(0)),
       EmitScalarExpr(E->getArg(1)),
-      llvm::SequentiallyConsistent);
+      llvm::AtomicOrdering::SequentiallyConsistent);
     RMWI->setVolatile(true);
     return RValue::get(RMWI);
   }
   case Builtin::BI__readfsdword: {
+    llvm::Type *IntTy = ConvertType(E->getType());
     Value *IntToPtr =
       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
-                             llvm::PointerType::get(CGM.Int32Ty, 257));
+                             llvm::PointerType::get(IntTy, 257));
     LoadInst *Load =
-        Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true);
+        Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
     return RValue::get(Load);
   }
 
@@ -1963,6 +2056,323 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
     break;
   }
+
+  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
+  case Builtin::BIread_pipe:
+  case Builtin::BIwrite_pipe: {
+    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+          *Arg1 = EmitScalarExpr(E->getArg(1));
+
+    // Type of the generic packet parameter.
+    unsigned GenericAS =
+        getContext().getTargetAddressSpace(LangAS::opencl_generic);
+    llvm::Type *I8PTy = llvm::PointerType::get(
+        llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
+
+    // Testing which overloaded version we should generate the call for.
+    if (2U == E->getNumArgs()) {
+      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
+                                                             : "__write_pipe_2";
+      // Creating a generic function type to be able to call with any builtin or
+      // user defined type.
+      llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+      Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
+      return RValue::get(Builder.CreateCall(
+          CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
+    } else {
+      assert(4 == E->getNumArgs() &&
+             "Illegal number of parameters to pipe function");
+      const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
+                                                             : "__write_pipe_4";
+
+      llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
+      Value *Arg2 = EmitScalarExpr(E->getArg(2)),
+            *Arg3 = EmitScalarExpr(E->getArg(3));
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+      Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
+      // We know the third argument is an integer type, but we may need to cast
+      // it to i32.
+      if (Arg2->getType() != Int32Ty)
+        Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
+      return RValue::get(Builder.CreateCall(
+          CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
+    }
+  }
+  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
+  // functions
+  case Builtin::BIreserve_read_pipe:
+  case Builtin::BIreserve_write_pipe:
+  case Builtin::BIwork_group_reserve_read_pipe:
+  case Builtin::BIwork_group_reserve_write_pipe:
+  case Builtin::BIsub_group_reserve_read_pipe:
+  case Builtin::BIsub_group_reserve_write_pipe: {
+    // Composing the mangled name for the function.
+    const char *Name;
+    if (BuiltinID == Builtin::BIreserve_read_pipe)
+      Name = "__reserve_read_pipe";
+    else if (BuiltinID == Builtin::BIreserve_write_pipe)
+      Name = "__reserve_write_pipe";
+    else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
+      Name = "__work_group_reserve_read_pipe";
+    else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
+      Name = "__work_group_reserve_write_pipe";
+    else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
+      Name = "__sub_group_reserve_read_pipe";
+    else
+      Name = "__sub_group_reserve_write_pipe";
+
+    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+          *Arg1 = EmitScalarExpr(E->getArg(1));
+    llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
+
+    // Building the generic function prototype.
+    llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
+    llvm::FunctionType *FTy = llvm::FunctionType::get(
+        ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+    // We know the second argument is an integer type, but we may need to cast
+    // it to i32.
+    if (Arg1->getType() != Int32Ty)
+      Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
+    return RValue::get(
+        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
+  }
+  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
+  // functions
+  case Builtin::BIcommit_read_pipe:
+  case Builtin::BIcommit_write_pipe:
+  case Builtin::BIwork_group_commit_read_pipe:
+  case Builtin::BIwork_group_commit_write_pipe:
+  case Builtin::BIsub_group_commit_read_pipe:
+  case Builtin::BIsub_group_commit_write_pipe: {
+    const char *Name;
+    if (BuiltinID == Builtin::BIcommit_read_pipe)
+      Name = "__commit_read_pipe";
+    else if (BuiltinID == Builtin::BIcommit_write_pipe)
+      Name = "__commit_write_pipe";
+    else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
+      Name = "__work_group_commit_read_pipe";
+    else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
+      Name = "__work_group_commit_write_pipe";
+    else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
+      Name = "__sub_group_commit_read_pipe";
+    else
+      Name = "__sub_group_commit_write_pipe";
+
+    Value *Arg0 = EmitScalarExpr(E->getArg(0)),
+          *Arg1 = EmitScalarExpr(E->getArg(1));
+
+    // Building the generic function prototype.
+    llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
+    llvm::FunctionType *FTy =
+        llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
+                                llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+
+    return RValue::get(
+        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
+  }
+  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
+  case Builtin::BIget_pipe_num_packets:
+  case Builtin::BIget_pipe_max_packets: {
+    const char *Name;
+    if (BuiltinID == Builtin::BIget_pipe_num_packets)
+      Name = "__get_pipe_num_packets";
+    else
+      Name = "__get_pipe_max_packets";
+
+    // Building the generic function prototype.
+    Value *Arg0 = EmitScalarExpr(E->getArg(0));
+    llvm::Type *ArgTys[] = {Arg0->getType()};
+    llvm::FunctionType *FTy = llvm::FunctionType::get(
+        Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+
+    return RValue::get(
+        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
+  }
+
+  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
+  case Builtin::BIto_global:
+  case Builtin::BIto_local:
+  case Builtin::BIto_private: {
+    auto Arg0 = EmitScalarExpr(E->getArg(0));
+    auto NewArgT = llvm::PointerType::get(Int8Ty,
+      CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
+    auto NewRetT = llvm::PointerType::get(Int8Ty,
+      CGM.getContext().getTargetAddressSpace(
+        E->getType()->getPointeeType().getAddressSpace()));
+    auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
+    llvm::Value *NewArg;
+    if (Arg0->getType()->getPointerAddressSpace() !=
+        NewArgT->getPointerAddressSpace())
+      NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
+    else
+      NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
+    auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
+    auto NewCall =
+        Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
+    return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
+      ConvertType(E->getType())));
+  }
+
+  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
+  // It contains four different overload formats specified in Table 6.13.17.1.
+  case Builtin::BIenqueue_kernel: {
+    StringRef Name; // Generated function call name
+    unsigned NumArgs = E->getNumArgs();
+
+    llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
+    llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
+
+    llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
+    llvm::Value *Range = EmitScalarExpr(E->getArg(2));
+
+    if (NumArgs == 4) {
+      // The most basic form of the call with parameters:
+      // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
+      Name = "__enqueue_kernel_basic";
+      llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
+
+      llvm::Value *Block =
+          Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+
+      return RValue::get(Builder.CreateCall(
+          CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
+    }
+    assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
+
+    // Could have events and/or vaargs.
+    if (E->getArg(3)->getType()->isBlockPointerType()) {
+      // No events passed, but has variadic arguments.
+      Name = "__enqueue_kernel_vaargs";
+      llvm::Value *Block =
+          Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
+      // Create a vector of the arguments, as well as a constant value to
+      // express to the runtime the number of variadic arguments.
+      std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
+                                         ConstantInt::get(IntTy, NumArgs - 4)};
+      std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
+                                          IntTy};
+
+      // Add the variadics.
+      for (unsigned I = 4; I < NumArgs; ++I) {
+        llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
+        unsigned TypeSizeInBytes =
+            getContext()
+                .getTypeSizeInChars(E->getArg(I)->getType())
+                .getQuantity();
+        Args.push_back(TypeSizeInBytes < 4
+                           ? Builder.CreateZExt(ArgSize, Int32Ty)
+                           : ArgSize);
+      }
+
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+      return RValue::get(
+          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+                             llvm::ArrayRef<llvm::Value *>(Args)));
+    }
+    // Any calls now have event arguments passed.
+    if (NumArgs >= 7) {
+      llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
+      unsigned AS4 =
+          E->getArg(4)->getType()->isArrayType()
+              ? E->getArg(4)->getType().getAddressSpace()
+              : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
+      llvm::Type *EventPtrAS4Ty =
+          EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
+      unsigned AS5 =
+          E->getArg(5)->getType()->getPointeeType().getAddressSpace();
+      llvm::Type *EventPtrAS5Ty =
+          EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
+
+      llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
+      llvm::Value *EventList =
+          E->getArg(4)->getType()->isArrayType()
+              ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
+              : EmitScalarExpr(E->getArg(4));
+      llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
+      llvm::Value *Block =
+          Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
+
+      std::vector<llvm::Type *> ArgTys = {
+          QueueTy,       Int32Ty,       RangeTy,  Int32Ty,
+          EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
+      std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
+                                         EventList, ClkEvent, Block};
+
+      if (NumArgs == 7) {
+        // Has events but no variadics.
+        Name = "__enqueue_kernel_basic_events";
+        llvm::FunctionType *FTy = llvm::FunctionType::get(
+            Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
+        return RValue::get(
+            Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+                               llvm::ArrayRef<llvm::Value *>(Args)));
+      }
+      // Has event info and variadics
+      // Pass the number of variadics to the runtime function too.
+      Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
+      ArgTys.push_back(Int32Ty);
+      Name = "__enqueue_kernel_events_vaargs";
+
+      // Add the variadics.
+      for (unsigned I = 7; I < NumArgs; ++I) {
+        llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
+        unsigned TypeSizeInBytes =
+            getContext()
+                .getTypeSizeInChars(E->getArg(I)->getType())
+                .getQuantity();
+        Args.push_back(TypeSizeInBytes < 4
+                           ? Builder.CreateZExt(ArgSize, Int32Ty)
+                           : ArgSize);
+      }
+      llvm::FunctionType *FTy = llvm::FunctionType::get(
+          Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+      return RValue::get(
+          Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
+                             llvm::ArrayRef<llvm::Value *>(Args)));
+    }
+  }
+  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
+  // parameter.
+  case Builtin::BIget_kernel_work_group_size: {
+    Value *Arg = EmitScalarExpr(E->getArg(0));
+    Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
+    return RValue::get(
+        Builder.CreateCall(CGM.CreateRuntimeFunction(
+                               llvm::FunctionType::get(IntTy, Int8PtrTy, false),
+                               "__get_kernel_work_group_size_impl"),
+                           Arg));
+  }
+  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
+    Value *Arg = EmitScalarExpr(E->getArg(0));
+    Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
+    return RValue::get(Builder.CreateCall(
+        CGM.CreateRuntimeFunction(
+            llvm::FunctionType::get(IntTy, Int8PtrTy, false),
+            "__get_kernel_preferred_work_group_multiple_impl"),
+        Arg));
+  }
+  case Builtin::BIprintf:
+    if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
+      return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
+    break;
+  case Builtin::BI__builtin_canonicalize:
+  case Builtin::BI__builtin_canonicalizef:
+  case Builtin::BI__builtin_canonicalizel:
+    return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
+
+  case Builtin::BI__builtin_thread_pointer: {
+    if (!getContext().getTargetInfo().isTLSSupported())
+      CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
+    // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
+    break;
+  }
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
@@ -2155,7 +2565,7 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
 }
 
 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
-  unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
+  unsigned nElts = V->getType()->getVectorNumElements();
   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
   return Builder.CreateShuffleVector(V, V, SV, "lane");
 }
@@ -3073,14 +3483,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vext_v:
   case NEON::BI__builtin_neon_vextq_v: {
     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
-    SmallVector<Constant*, 16> Indices;
+    SmallVector<uint32_t, 16> Indices;
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
-      Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
+      Indices.push_back(i+CV);
 
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Value *SV = llvm::ConstantVector::get(Indices);
-    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
+    return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
   }
   case NEON::BI__builtin_neon_vfma_v:
   case NEON::BI__builtin_neon_vfmaq_v: {
@@ -3278,14 +3687,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(Builder.getInt32(i+vi));
-        Indices.push_back(Builder.getInt32(i+e+vi));
+        Indices.push_back(i+vi);
+        Indices.push_back(i+e+vi);
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -3307,13 +3715,12 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
-        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
+        Indices.push_back(2*i+vi);
 
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -3326,14 +3733,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
-        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+        Indices.push_back((i + vi*e) >> 1);
+        Indices.push_back(((i + vi*e) >> 1)+e);
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -3381,19 +3787,19 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
   if (ExtOp)
     TblOps.push_back(ExtOp);
 
-  // Build a vector containing sequential number like (0, 1, 2, ..., 15)  
-  SmallVector<Constant*, 16> Indices;
+  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
+  SmallVector<uint32_t, 16> Indices;
   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
-    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i));
-    Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1));
+    Indices.push_back(2*i);
+    Indices.push_back(2*i+1);
   }
-  Value *SV = llvm::ConstantVector::get(Indices);
 
   int PairPos = 0, End = Ops.size() - 1;
   while (PairPos < End) {
     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
-                                                     Ops[PairPos+1], SV, Name));
+                                                     Ops[PairPos+1], Indices,
+                                                     Name));
     PairPos += 2;
   }
 
@@ -3402,13 +3808,13 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
   if (PairPos == End) {
     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
-                                                     ZeroTbl, SV, Name));
+                                                     ZeroTbl, Indices, Name));
   }
 
   Function *TblF;
   TblOps.push_back(IndexOp);
   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
-  
+
   return CGF.EmitNeonCall(TblF, TblOps, Name);
 }
 
@@ -3452,7 +3858,9 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
                                          const CallExpr *E,
                                          llvm::Type *RegisterType,
-                                         llvm::Type *ValueType, bool IsRead) {
+                                         llvm::Type *ValueType,
+                                         bool IsRead,
+                                         StringRef SysReg = "") {
   // write and register intrinsics only support 32 and 64 bit operations.
   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
           && "Unsupported size for register.");
@@ -3461,8 +3869,10 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
   CodeGen::CodeGenModule &CGM = CGF.CGM;
   LLVMContext &Context = CGM.getLLVMContext();
 
-  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
-  StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
+  if (SysReg.empty()) {
+    const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
+    SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
+  }
 
   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
@@ -3602,6 +4012,74 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
   }
 
+  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
+      BuiltinID == ARM::BI__builtin_arm_mcrr2) {
+    Function *F;
+
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin");
+    case ARM::BI__builtin_arm_mcrr:
+      F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
+      break;
+    case ARM::BI__builtin_arm_mcrr2:
+      F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
+      break;
+    }
+
+    // MCRR{2} instruction has 5 operands but
+    // the intrinsic has 4 because Rt and Rt2
+    // are represented as a single unsigned 64
+    // bit integer in the intrinsic definition
+    // but internally it's represented as 2 32
+    // bit integers.
+
+    Value *Coproc = EmitScalarExpr(E->getArg(0));
+    Value *Opc1 = EmitScalarExpr(E->getArg(1));
+    Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
+    Value *CRm = EmitScalarExpr(E->getArg(3));
+
+    Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
+    Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
+    Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
+    Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
+
+    return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
+  }
+
+  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
+      BuiltinID == ARM::BI__builtin_arm_mrrc2) {
+    Function *F;
+
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin");
+    case ARM::BI__builtin_arm_mrrc:
+      F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
+      break;
+    case ARM::BI__builtin_arm_mrrc2:
+      F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
+      break;
+    }
+
+    Value *Coproc = EmitScalarExpr(E->getArg(0));
+    Value *Opc1 = EmitScalarExpr(E->getArg(1));
+    Value *CRm  = EmitScalarExpr(E->getArg(2));
+    Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
+
+    // Returns an unsigned 64 bit integer, represented
+    // as two 32 bit integers.
+
+    Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
+    Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
+    Rt = Builder.CreateZExt(Rt, Int64Ty);
+    Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
+
+    Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
+    RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
+    RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
+
+    return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
+  }
+
   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
@@ -3914,7 +4392,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   // the first argument, but the LLVM intrinsic expects it as the third one.
   case ARM::BI_MoveToCoprocessor:
   case ARM::BI_MoveToCoprocessor2: {
-    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 
+    Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
                                   Ops[3], Ops[4], Ops[5]});
@@ -4478,11 +4956,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F);
   }
 
-  if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
-    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
-    return Builder.CreateCall(F);
-  }
-
   // CRC32
   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {
@@ -5150,22 +5623,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
   }
-  case NEON::BI__builtin_neon_vfms_v:
-  case NEON::BI__builtin_neon_vfmsq_v: {  // Only used for FP types
-    // FIXME: probably remove when we no longer support aarch64_simd.h
-    // (arm_neon.h delegates to vfma).
-
-    // The ARM builtins (and instructions) have the addend as the first
-    // operand, but the 'fma' intrinsics have it last. Swap it around here.
-    Value *Subtrahend = Ops[0];
-    Value *Multiplicand = Ops[2];
-    Ops[0] = Multiplicand;
-    Ops[2] = Subtrahend;
-    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
-    Ops[1] = Builder.CreateFNeg(Ops[1]);
-    Int = Intrinsic::fma;
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls");
-  }
   case NEON::BI__builtin_neon_vmull_v:
     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
@@ -5988,14 +6445,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
-        Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
+        Indices.push_back(i+vi);
+        Indices.push_back(i+e+vi);
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -6008,13 +6464,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
-        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
+        Indices.push_back(2*i+vi);
 
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -6027,14 +6482,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Value *SV = nullptr;
 
     for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
+      SmallVector<uint32_t, 16> Indices;
       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
-        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+        Indices.push_back((i + vi*e) >> 1);
+        Indices.push_back(((i + vi*e) >> 1)+e);
       }
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
@@ -6110,6 +6564,118 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
   return Result;
 }
 
+// Convert the mask from an integer type to a vector of i1.
+static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
+                              unsigned NumElts) {
+
+  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
+                         cast<IntegerType>(Mask->getType())->getBitWidth());
+  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
+
+  // If we have less than 8 elements, then the starting mask was an i8 and
+  // we need to extract down to the right number of elements.
+  if (NumElts < 8) {
+    uint32_t Indices[4];
+    for (unsigned i = 0; i != NumElts; ++i)
+      Indices[i] = i;
+    MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
+                                             makeArrayRef(Indices, NumElts),
+                                             "extract");
+  }
+  return MaskVec;
+}
+
+static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
+                                 SmallVectorImpl<Value *> &Ops,
+                                 unsigned Align) {
+  // Cast the pointer to right type.
+  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+                               llvm::PointerType::getUnqual(Ops[1]->getType()));
+
+  // If the mask is all ones just emit a regular store.
+  if (const auto *C = dyn_cast<Constant>(Ops[2]))
+    if (C->isAllOnesValue())
+      return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
+
+  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+                                   Ops[1]->getType()->getVectorNumElements());
+
+  return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
+}
+
+static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
+                                SmallVectorImpl<Value *> &Ops, unsigned Align) {
+  // Cast the pointer to right type.
+  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
+                               llvm::PointerType::getUnqual(Ops[1]->getType()));
+
+  // If the mask is all ones just emit a regular store.
+  if (const auto *C = dyn_cast<Constant>(Ops[2]))
+    if (C->isAllOnesValue())
+      return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
+
+  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
+                                   Ops[1]->getType()->getVectorNumElements());
+
+  return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
+}
+
+static Value *EmitX86Select(CodeGenFunction &CGF,
+                            Value *Mask, Value *Op0, Value *Op1) {
+
+  // If the mask is all ones just return first argument.
+  if (const auto *C = dyn_cast<Constant>(Mask))
+    if (C->isAllOnesValue())
+      return Op0;
+
+  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
+
+  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
+}
+
+static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
+                                   bool Signed, SmallVectorImpl<Value *> &Ops) {
+  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+  Value *Cmp;
+
+  if (CC == 3) {
+    Cmp = Constant::getNullValue(
+                       llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
+  } else if (CC == 7) {
+    Cmp = Constant::getAllOnesValue(
+                       llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
+  } else {
+    ICmpInst::Predicate Pred;
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case 0: Pred = ICmpInst::ICMP_EQ;  break;
+    case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+    case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+    case 4: Pred = ICmpInst::ICMP_NE;  break;
+    case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+    case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+    }
+    Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
+  }
+
+  const auto *C = dyn_cast<Constant>(Ops.back());
+  if (!C || !C->isAllOnesValue())
+    Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
+
+  if (NumElts < 8) {
+    uint32_t Indices[8];
+    for (unsigned i = 0; i != NumElts; ++i)
+      Indices[i] = i;
+    for (unsigned i = NumElts; i != 8; ++i)
+      Indices[i] = i % NumElts + NumElts;
+    Cmp = CGF.Builder.CreateShuffleVector(
+        Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
+  }
+  return CGF.Builder.CreateBitCast(Cmp,
+                                   IntegerType::get(CGF.getLLVMContext(),
+                                                    std::max(NumElts, 8U)));
+}
+
 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
   if (BuiltinID == X86::BI__builtin_ms_va_start ||
@@ -6160,6 +6726,31 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
   }
 
+  // These exist so that the builtin that takes an immediate can be bounds
+  // checked by clang to avoid passing bad immediates to the backend. Since
+  // AVX has a larger immediate than SSE we would need separate builtins to
+  // do the different bounds checking. Rather than create a clang specific
+  // SSE only builtin, this implements eight separate builtins to match gcc
+  // implementation.
+  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
+    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
+    llvm::Function *F = CGM.getIntrinsic(ID);
+    return Builder.CreateCall(F, Ops);
+  };
+
+  // For the vector forms of FP comparisons, translate the builtins directly to
+  // IR.
+  // TODO: The builtins could be removed if the SSE header files used vector
+  // extension comparisons directly (vector ordered/unordered may need
+  // additional support via __builtin_isnan()).
+  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
+    Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+    llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
+    llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
+    Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
+    return Builder.CreateBitCast(Sext, FPVecTy);
+  };
+
   switch (BuiltinID) {
   default: return nullptr;
   case X86::BI__builtin_cpu_supports: {
@@ -6188,6 +6779,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       AVX512F,
       BMI,
       BMI2,
+      AES,
+      PCLMUL,
+      AVX512VL,
+      AVX512BW,
+      AVX512DQ,
+      AVX512CD,
+      AVX512ER,
+      AVX512PF,
+      AVX512VBMI,
+      AVX512IFMA,
       MAX
     };
 
@@ -6198,6 +6799,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                               .Case("sse", X86Features::SSE)
                               .Case("sse2", X86Features::SSE2)
                               .Case("sse3", X86Features::SSE3)
+                              .Case("ssse3", X86Features::SSSE3)
                               .Case("sse4.1", X86Features::SSE4_1)
                               .Case("sse4.2", X86Features::SSE4_2)
                               .Case("avx", X86Features::AVX)
@@ -6209,6 +6811,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                               .Case("avx512f", X86Features::AVX512F)
                               .Case("bmi", X86Features::BMI)
                               .Case("bmi2", X86Features::BMI2)
+                              .Case("aes", X86Features::AES)
+                              .Case("pclmul", X86Features::PCLMUL)
+                              .Case("avx512vl", X86Features::AVX512VL)
+                              .Case("avx512bw", X86Features::AVX512BW)
+                              .Case("avx512dq", X86Features::AVX512DQ)
+                              .Case("avx512cd", X86Features::AVX512CD)
+                              .Case("avx512er", X86Features::AVX512ER)
+                              .Case("avx512pf", X86Features::AVX512PF)
+                              .Case("avx512vbmi", X86Features::AVX512VBMI)
+                              .Case("avx512ifma", X86Features::AVX512IFMA)
                               .Default(X86Features::MAX);
     assert(Feature != X86Features::MAX && "Invalid feature!");
 
@@ -6237,7 +6849,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
 
     // Check the value of the bit corresponding to the feature requested.
     Value *Bitset = Builder.CreateAnd(
-        Features, llvm::ConstantInt::get(Int32Ty, 1 << Feature));
+        Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
   }
   case X86::BI_mm_prefetch: {
@@ -6312,6 +6924,78 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Ops.push_back(Mlo);
     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
   }
+  case X86::BI__builtin_ia32_storedqudi128_mask:
+  case X86::BI__builtin_ia32_storedqusi128_mask:
+  case X86::BI__builtin_ia32_storedquhi128_mask:
+  case X86::BI__builtin_ia32_storedquqi128_mask:
+  case X86::BI__builtin_ia32_storeupd128_mask:
+  case X86::BI__builtin_ia32_storeups128_mask:
+  case X86::BI__builtin_ia32_storedqudi256_mask:
+  case X86::BI__builtin_ia32_storedqusi256_mask:
+  case X86::BI__builtin_ia32_storedquhi256_mask:
+  case X86::BI__builtin_ia32_storedquqi256_mask:
+  case X86::BI__builtin_ia32_storeupd256_mask:
+  case X86::BI__builtin_ia32_storeups256_mask:
+  case X86::BI__builtin_ia32_storedqudi512_mask:
+  case X86::BI__builtin_ia32_storedqusi512_mask:
+  case X86::BI__builtin_ia32_storedquhi512_mask:
+  case X86::BI__builtin_ia32_storedquqi512_mask:
+  case X86::BI__builtin_ia32_storeupd512_mask:
+  case X86::BI__builtin_ia32_storeups512_mask:
+    return EmitX86MaskedStore(*this, Ops, 1);
+
+  case X86::BI__builtin_ia32_movdqa32store128_mask:
+  case X86::BI__builtin_ia32_movdqa64store128_mask:
+  case X86::BI__builtin_ia32_storeaps128_mask:
+  case X86::BI__builtin_ia32_storeapd128_mask:
+  case X86::BI__builtin_ia32_movdqa32store256_mask:
+  case X86::BI__builtin_ia32_movdqa64store256_mask:
+  case X86::BI__builtin_ia32_storeaps256_mask:
+  case X86::BI__builtin_ia32_storeapd256_mask:
+  case X86::BI__builtin_ia32_movdqa32store512_mask:
+  case X86::BI__builtin_ia32_movdqa64store512_mask:
+  case X86::BI__builtin_ia32_storeaps512_mask:
+  case X86::BI__builtin_ia32_storeapd512_mask: {
+    unsigned Align =
+      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
+    return EmitX86MaskedStore(*this, Ops, Align);
+  }
+  case X86::BI__builtin_ia32_loadups128_mask:
+  case X86::BI__builtin_ia32_loadups256_mask:
+  case X86::BI__builtin_ia32_loadups512_mask:
+  case X86::BI__builtin_ia32_loadupd128_mask:
+  case X86::BI__builtin_ia32_loadupd256_mask:
+  case X86::BI__builtin_ia32_loadupd512_mask:
+  case X86::BI__builtin_ia32_loaddquqi128_mask:
+  case X86::BI__builtin_ia32_loaddquqi256_mask:
+  case X86::BI__builtin_ia32_loaddquqi512_mask:
+  case X86::BI__builtin_ia32_loaddquhi128_mask:
+  case X86::BI__builtin_ia32_loaddquhi256_mask:
+  case X86::BI__builtin_ia32_loaddquhi512_mask:
+  case X86::BI__builtin_ia32_loaddqusi128_mask:
+  case X86::BI__builtin_ia32_loaddqusi256_mask:
+  case X86::BI__builtin_ia32_loaddqusi512_mask:
+  case X86::BI__builtin_ia32_loaddqudi128_mask:
+  case X86::BI__builtin_ia32_loaddqudi256_mask:
+  case X86::BI__builtin_ia32_loaddqudi512_mask:
+    return EmitX86MaskedLoad(*this, Ops, 1);
+
+  case X86::BI__builtin_ia32_loadaps128_mask:
+  case X86::BI__builtin_ia32_loadaps256_mask:
+  case X86::BI__builtin_ia32_loadaps512_mask:
+  case X86::BI__builtin_ia32_loadapd128_mask:
+  case X86::BI__builtin_ia32_loadapd256_mask:
+  case X86::BI__builtin_ia32_loadapd512_mask:
+  case X86::BI__builtin_ia32_movdqa32load128_mask:
+  case X86::BI__builtin_ia32_movdqa32load256_mask:
+  case X86::BI__builtin_ia32_movdqa32load512_mask:
+  case X86::BI__builtin_ia32_movdqa64load128_mask:
+  case X86::BI__builtin_ia32_movdqa64load256_mask:
+  case X86::BI__builtin_ia32_movdqa64load512_mask: {
+    unsigned Align =
+      getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
+    return EmitX86MaskedLoad(*this, Ops, Align);
+  }
   case X86::BI__builtin_ia32_storehps:
   case X86::BI__builtin_ia32_storelps: {
     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
@@ -6330,103 +7014,50 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case X86::BI__builtin_ia32_palignr128:
-  case X86::BI__builtin_ia32_palignr256: {
+  case X86::BI__builtin_ia32_palignr256:
+  case X86::BI__builtin_ia32_palignr128_mask:
+  case X86::BI__builtin_ia32_palignr256_mask:
+  case X86::BI__builtin_ia32_palignr512_mask: {
     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
 
-    unsigned NumElts =
-      cast<llvm::VectorType>(Ops[0]->getType())->getNumElements();
+    unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
     assert(NumElts % 16 == 0);
-    unsigned NumLanes = NumElts / 16;
-    unsigned NumLaneElts = NumElts / NumLanes;
 
     // If palignr is shifting the pair of vectors more than the size of two
     // lanes, emit zero.
-    if (ShiftVal >= (2 * NumLaneElts))
+    if (ShiftVal >= 32)
       return llvm::Constant::getNullValue(ConvertType(E->getType()));
 
     // If palignr is shifting the pair of input vectors more than one lane,
     // but less than two lanes, convert to shifting in zeroes.
-    if (ShiftVal > NumLaneElts) {
-      ShiftVal -= NumLaneElts;
+    if (ShiftVal > 16) {
+      ShiftVal -= 16;
       Ops[1] = Ops[0];
       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
     }
 
-    uint32_t Indices[32];
+    uint32_t Indices[64];
     // 256-bit palignr operates on 128-bit lanes so we need to handle that
-    for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
-      for (unsigned i = 0; i != NumLaneElts; ++i) {
+    for (unsigned l = 0; l != NumElts; l += 16) {
+      for (unsigned i = 0; i != 16; ++i) {
         unsigned Idx = ShiftVal + i;
-        if (Idx >= NumLaneElts)
-          Idx += NumElts - NumLaneElts; // End of lane, switch operand.
+        if (Idx >= 16)
+          Idx += NumElts - 16; // End of lane, switch operand.
         Indices[l + i] = Idx + l;
       }
     }
 
-    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
-                                              makeArrayRef(Indices, NumElts));
-    return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
-  }
-  case X86::BI__builtin_ia32_pslldqi256: {
-    // Shift value is in bits so divide by 8.
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
+    Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
+                                               makeArrayRef(Indices, NumElts),
+                                               "palignr");
 
-    // If pslldq is shifting the vector more than 15 bytes, emit zero.
-    if (shiftVal >= 16)
-      return llvm::Constant::getNullValue(ConvertType(E->getType()));
-
-    uint32_t Indices[32];
-    // 256-bit pslldq operates on 128-bit lanes so we need to handle that
-    for (unsigned l = 0; l != 32; l += 16) {
-      for (unsigned i = 0; i != 16; ++i) {
-        unsigned Idx = 32 + i - shiftVal;
-        if (Idx < 32) Idx -= 16; // end of lane, switch operand.
-        Indices[l + i] = Idx + l;
-      }
-    }
-
-    llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
-    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-    Value *Zero = llvm::Constant::getNullValue(VecTy);
+    // If this isn't a masked builtin, just return the align operation.
+    if (Ops.size() == 3)
+      return Align;
 
-    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
-    SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
-    llvm::Type *ResultType = ConvertType(E->getType());
-    return Builder.CreateBitCast(SV, ResultType, "cast");
+    return EmitX86Select(*this, Ops[4], Align, Ops[3]);
   }
-  case X86::BI__builtin_ia32_psrldqi256: {
-    // Shift value is in bits so divide by 8.
-    unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3;
-
-    // If psrldq is shifting the vector more than 15 bytes, emit zero.
-    if (shiftVal >= 16)
-      return llvm::Constant::getNullValue(ConvertType(E->getType()));
 
-    uint32_t Indices[32];
-    // 256-bit psrldq operates on 128-bit lanes so we need to handle that
-    for (unsigned l = 0; l != 32; l += 16) {
-      for (unsigned i = 0; i != 16; ++i) {
-        unsigned Idx = i + shiftVal;
-        if (Idx >= 16) Idx += 16; // end of lane, switch operand.
-        Indices[l + i] = Idx + l;
-      }
-    }
-
-    llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32);
-    Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
-    Value *Zero = llvm::Constant::getNullValue(VecTy);
-
-    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
-    SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
-    llvm::Type *ResultType = ConvertType(E->getType());
-    return Builder.CreateBitCast(SV, ResultType, "cast");
-  }
-  case X86::BI__builtin_ia32_movntps:
-  case X86::BI__builtin_ia32_movntps256:
-  case X86::BI__builtin_ia32_movntpd:
-  case X86::BI__builtin_ia32_movntpd256:
-  case X86::BI__builtin_ia32_movntdq:
-  case X86::BI__builtin_ia32_movntdq256:
   case X86::BI__builtin_ia32_movnti:
   case X86::BI__builtin_ia32_movnti64: {
     llvm::MDNode *Node = llvm::MDNode::get(
@@ -6439,17 +7070,156 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
 
-    // If the operand is an integer, we can't assume alignment. Otherwise,
-    // assume natural alignment.
-    QualType ArgTy = E->getArg(1)->getType();
-    unsigned Align;
-    if (ArgTy->isIntegerType())
-      Align = 1;
-    else
-      Align = getContext().getTypeSizeInChars(ArgTy).getQuantity();
-    SI->setAlignment(Align);
+    // No alignment for scalar intrinsic store.
+    SI->setAlignment(1);
+    return SI;
+  }
+  case X86::BI__builtin_ia32_movntsd:
+  case X86::BI__builtin_ia32_movntss: {
+    llvm::MDNode *Node = llvm::MDNode::get(
+        getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+
+    // Extract the 0'th element of the source vector.
+    Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
+
+    // Convert the type of the pointer to a pointer to the stored type.
+    Value *BC = Builder.CreateBitCast(Ops[0],
+                                llvm::PointerType::getUnqual(Scl->getType()),
+                                      "cast");
+
+    // Unaligned nontemporal store of the scalar value.
+    StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
+    SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+    SI->setAlignment(1);
     return SI;
   }
+
+  case X86::BI__builtin_ia32_selectb_128:
+  case X86::BI__builtin_ia32_selectb_256:
+  case X86::BI__builtin_ia32_selectb_512:
+  case X86::BI__builtin_ia32_selectw_128:
+  case X86::BI__builtin_ia32_selectw_256:
+  case X86::BI__builtin_ia32_selectw_512:
+  case X86::BI__builtin_ia32_selectd_128:
+  case X86::BI__builtin_ia32_selectd_256:
+  case X86::BI__builtin_ia32_selectd_512:
+  case X86::BI__builtin_ia32_selectq_128:
+  case X86::BI__builtin_ia32_selectq_256:
+  case X86::BI__builtin_ia32_selectq_512:
+  case X86::BI__builtin_ia32_selectps_128:
+  case X86::BI__builtin_ia32_selectps_256:
+  case X86::BI__builtin_ia32_selectps_512:
+  case X86::BI__builtin_ia32_selectpd_128:
+  case X86::BI__builtin_ia32_selectpd_256:
+  case X86::BI__builtin_ia32_selectpd_512:
+    return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
+  case X86::BI__builtin_ia32_pcmpeqb128_mask:
+  case X86::BI__builtin_ia32_pcmpeqb256_mask:
+  case X86::BI__builtin_ia32_pcmpeqb512_mask:
+  case X86::BI__builtin_ia32_pcmpeqw128_mask:
+  case X86::BI__builtin_ia32_pcmpeqw256_mask:
+  case X86::BI__builtin_ia32_pcmpeqw512_mask:
+  case X86::BI__builtin_ia32_pcmpeqd128_mask:
+  case X86::BI__builtin_ia32_pcmpeqd256_mask:
+  case X86::BI__builtin_ia32_pcmpeqd512_mask:
+  case X86::BI__builtin_ia32_pcmpeqq128_mask:
+  case X86::BI__builtin_ia32_pcmpeqq256_mask:
+  case X86::BI__builtin_ia32_pcmpeqq512_mask:
+    return EmitX86MaskedCompare(*this, 0, false, Ops);
+  case X86::BI__builtin_ia32_pcmpgtb128_mask:
+  case X86::BI__builtin_ia32_pcmpgtb256_mask:
+  case X86::BI__builtin_ia32_pcmpgtb512_mask:
+  case X86::BI__builtin_ia32_pcmpgtw128_mask:
+  case X86::BI__builtin_ia32_pcmpgtw256_mask:
+  case X86::BI__builtin_ia32_pcmpgtw512_mask:
+  case X86::BI__builtin_ia32_pcmpgtd128_mask:
+  case X86::BI__builtin_ia32_pcmpgtd256_mask:
+  case X86::BI__builtin_ia32_pcmpgtd512_mask:
+  case X86::BI__builtin_ia32_pcmpgtq128_mask:
+  case X86::BI__builtin_ia32_pcmpgtq256_mask:
+  case X86::BI__builtin_ia32_pcmpgtq512_mask:
+    return EmitX86MaskedCompare(*this, 6, true, Ops);
+  case X86::BI__builtin_ia32_cmpb128_mask:
+  case X86::BI__builtin_ia32_cmpb256_mask:
+  case X86::BI__builtin_ia32_cmpb512_mask:
+  case X86::BI__builtin_ia32_cmpw128_mask:
+  case X86::BI__builtin_ia32_cmpw256_mask:
+  case X86::BI__builtin_ia32_cmpw512_mask:
+  case X86::BI__builtin_ia32_cmpd128_mask:
+  case X86::BI__builtin_ia32_cmpd256_mask:
+  case X86::BI__builtin_ia32_cmpd512_mask:
+  case X86::BI__builtin_ia32_cmpq128_mask:
+  case X86::BI__builtin_ia32_cmpq256_mask:
+  case X86::BI__builtin_ia32_cmpq512_mask: {
+    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
+    return EmitX86MaskedCompare(*this, CC, true, Ops);
+  }
+  case X86::BI__builtin_ia32_ucmpb128_mask:
+  case X86::BI__builtin_ia32_ucmpb256_mask:
+  case X86::BI__builtin_ia32_ucmpb512_mask:
+  case X86::BI__builtin_ia32_ucmpw128_mask:
+  case X86::BI__builtin_ia32_ucmpw256_mask:
+  case X86::BI__builtin_ia32_ucmpw512_mask:
+  case X86::BI__builtin_ia32_ucmpd128_mask:
+  case X86::BI__builtin_ia32_ucmpd256_mask:
+  case X86::BI__builtin_ia32_ucmpd512_mask:
+  case X86::BI__builtin_ia32_ucmpq128_mask:
+  case X86::BI__builtin_ia32_ucmpq256_mask:
+  case X86::BI__builtin_ia32_ucmpq512_mask: {
+    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
+    return EmitX86MaskedCompare(*this, CC, false, Ops);
+  }
+
+  case X86::BI__builtin_ia32_vplzcntd_128_mask:
+  case X86::BI__builtin_ia32_vplzcntd_256_mask:
+  case X86::BI__builtin_ia32_vplzcntd_512_mask:
+  case X86::BI__builtin_ia32_vplzcntq_128_mask:
+  case X86::BI__builtin_ia32_vplzcntq_256_mask:
+  case X86::BI__builtin_ia32_vplzcntq_512_mask: {
+    Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
+    return EmitX86Select(*this, Ops[2],
+                         Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
+                         Ops[1]);
+  }
+
+  // TODO: Handle 64/512-bit vector widths of min/max.
+  case X86::BI__builtin_ia32_pmaxsb128:
+  case X86::BI__builtin_ia32_pmaxsw128:
+  case X86::BI__builtin_ia32_pmaxsd128:
+  case X86::BI__builtin_ia32_pmaxsb256:
+  case X86::BI__builtin_ia32_pmaxsw256:
+  case X86::BI__builtin_ia32_pmaxsd256: {
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
+    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+  }
+  case X86::BI__builtin_ia32_pmaxub128:
+  case X86::BI__builtin_ia32_pmaxuw128:
+  case X86::BI__builtin_ia32_pmaxud128:
+  case X86::BI__builtin_ia32_pmaxub256:
+  case X86::BI__builtin_ia32_pmaxuw256:
+  case X86::BI__builtin_ia32_pmaxud256: {
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
+    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+  }
+  case X86::BI__builtin_ia32_pminsb128:
+  case X86::BI__builtin_ia32_pminsw128:
+  case X86::BI__builtin_ia32_pminsd128:
+  case X86::BI__builtin_ia32_pminsb256:
+  case X86::BI__builtin_ia32_pminsw256:
+  case X86::BI__builtin_ia32_pminsd256: {
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
+    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+  }
+  case X86::BI__builtin_ia32_pminub128:
+  case X86::BI__builtin_ia32_pminuw128:
+  case X86::BI__builtin_ia32_pminud128:
+  case X86::BI__builtin_ia32_pminub256:
+  case X86::BI__builtin_ia32_pminuw256:
+  case X86::BI__builtin_ia32_pminud256: {
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
+    return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
+  }
+
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
   case X86::BI__builtin_ia32_pswapdsi: {
@@ -6492,154 +7262,107 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                                       Ops[0]);
     return Builder.CreateExtractValue(Call, 1);
   }
-  // SSE comparison intrisics
+
+  // SSE packed comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqps:
+  case X86::BI__builtin_ia32_cmpeqpd:
+    return getVectorFCmpIR(CmpInst::FCMP_OEQ);
   case X86::BI__builtin_ia32_cmpltps:
+  case X86::BI__builtin_ia32_cmpltpd:
+    return getVectorFCmpIR(CmpInst::FCMP_OLT);
   case X86::BI__builtin_ia32_cmpleps:
+  case X86::BI__builtin_ia32_cmplepd:
+    return getVectorFCmpIR(CmpInst::FCMP_OLE);
   case X86::BI__builtin_ia32_cmpunordps:
+  case X86::BI__builtin_ia32_cmpunordpd:
+    return getVectorFCmpIR(CmpInst::FCMP_UNO);
   case X86::BI__builtin_ia32_cmpneqps:
+  case X86::BI__builtin_ia32_cmpneqpd:
+    return getVectorFCmpIR(CmpInst::FCMP_UNE);
   case X86::BI__builtin_ia32_cmpnltps:
+  case X86::BI__builtin_ia32_cmpnltpd:
+    return getVectorFCmpIR(CmpInst::FCMP_UGE);
   case X86::BI__builtin_ia32_cmpnleps:
+  case X86::BI__builtin_ia32_cmpnlepd:
+    return getVectorFCmpIR(CmpInst::FCMP_UGT);
   case X86::BI__builtin_ia32_cmpordps:
+  case X86::BI__builtin_ia32_cmpordpd:
+    return getVectorFCmpIR(CmpInst::FCMP_ORD);
+  case X86::BI__builtin_ia32_cmpps:
+  case X86::BI__builtin_ia32_cmpps256:
+  case X86::BI__builtin_ia32_cmppd:
+  case X86::BI__builtin_ia32_cmppd256: {
+    unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+    // If this one of the SSE immediates, we can use native IR.
+    if (CC < 8) {
+      FCmpInst::Predicate Pred;
+      switch (CC) {
+      case 0: Pred = FCmpInst::FCMP_OEQ; break;
+      case 1: Pred = FCmpInst::FCMP_OLT; break;
+      case 2: Pred = FCmpInst::FCMP_OLE; break;
+      case 3: Pred = FCmpInst::FCMP_UNO; break;
+      case 4: Pred = FCmpInst::FCMP_UNE; break;
+      case 5: Pred = FCmpInst::FCMP_UGE; break;
+      case 6: Pred = FCmpInst::FCMP_UGT; break;
+      case 7: Pred = FCmpInst::FCMP_ORD; break;
+      }
+      return getVectorFCmpIR(Pred);
+    }
+
+    // We can't handle 8-31 immediates with native IR, use the intrinsic.
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_cmpps:
+      ID = Intrinsic::x86_sse_cmp_ps;
+      break;
+    case X86::BI__builtin_ia32_cmpps256:
+      ID = Intrinsic::x86_avx_cmp_ps_256;
+      break;
+    case X86::BI__builtin_ia32_cmppd:
+      ID = Intrinsic::x86_sse2_cmp_pd;
+      break;
+    case X86::BI__builtin_ia32_cmppd256:
+      ID = Intrinsic::x86_avx_cmp_pd_256;
+      break;
+    }
+
+    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+  }
+
+  // SSE scalar comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
   case X86::BI__builtin_ia32_cmpltss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
   case X86::BI__builtin_ia32_cmpless:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
   case X86::BI__builtin_ia32_cmpunordss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
   case X86::BI__builtin_ia32_cmpneqss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
   case X86::BI__builtin_ia32_cmpnltss:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
   case X86::BI__builtin_ia32_cmpnless:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
   case X86::BI__builtin_ia32_cmpordss:
-  case X86::BI__builtin_ia32_cmpeqpd:
-  case X86::BI__builtin_ia32_cmpltpd:
-  case X86::BI__builtin_ia32_cmplepd:
-  case X86::BI__builtin_ia32_cmpunordpd:
-  case X86::BI__builtin_ia32_cmpneqpd:
-  case X86::BI__builtin_ia32_cmpnltpd:
-  case X86::BI__builtin_ia32_cmpnlepd:
-  case X86::BI__builtin_ia32_cmpordpd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
   case X86::BI__builtin_ia32_cmpeqsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
   case X86::BI__builtin_ia32_cmpltsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
   case X86::BI__builtin_ia32_cmplesd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
   case X86::BI__builtin_ia32_cmpunordsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
   case X86::BI__builtin_ia32_cmpneqsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
   case X86::BI__builtin_ia32_cmpnltsd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
   case X86::BI__builtin_ia32_cmpnlesd:
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
   case X86::BI__builtin_ia32_cmpordsd:
-    // These exist so that the builtin that takes an immediate can be bounds
-    // checked by clang to avoid passing bad immediates to the backend. Since
-    // AVX has a larger immediate than SSE we would need separate builtins to
-    // do the different bounds checking. Rather than create a clang specific
-    // SSE only builtin, this implements eight separate builtins to match gcc
-    // implementation.
-
-    // Choose the immediate.
-    unsigned Imm;
-    switch (BuiltinID) {
-    default: llvm_unreachable("Unsupported intrinsic!");
-    case X86::BI__builtin_ia32_cmpeqps:
-    case X86::BI__builtin_ia32_cmpeqss:
-    case X86::BI__builtin_ia32_cmpeqpd:
-    case X86::BI__builtin_ia32_cmpeqsd:
-      Imm = 0;
-      break;
-    case X86::BI__builtin_ia32_cmpltps:
-    case X86::BI__builtin_ia32_cmpltss:
-    case X86::BI__builtin_ia32_cmpltpd:
-    case X86::BI__builtin_ia32_cmpltsd:
-      Imm = 1;
-      break;
-    case X86::BI__builtin_ia32_cmpleps:
-    case X86::BI__builtin_ia32_cmpless:
-    case X86::BI__builtin_ia32_cmplepd:
-    case X86::BI__builtin_ia32_cmplesd:
-      Imm = 2;
-      break;
-    case X86::BI__builtin_ia32_cmpunordps:
-    case X86::BI__builtin_ia32_cmpunordss:
-    case X86::BI__builtin_ia32_cmpunordpd:
-    case X86::BI__builtin_ia32_cmpunordsd:
-      Imm = 3;
-      break;
-    case X86::BI__builtin_ia32_cmpneqps:
-    case X86::BI__builtin_ia32_cmpneqss:
-    case X86::BI__builtin_ia32_cmpneqpd:
-    case X86::BI__builtin_ia32_cmpneqsd:
-      Imm = 4;
-      break;
-    case X86::BI__builtin_ia32_cmpnltps:
-    case X86::BI__builtin_ia32_cmpnltss:
-    case X86::BI__builtin_ia32_cmpnltpd:
-    case X86::BI__builtin_ia32_cmpnltsd:
-      Imm = 5;
-      break;
-    case X86::BI__builtin_ia32_cmpnleps:
-    case X86::BI__builtin_ia32_cmpnless:
-    case X86::BI__builtin_ia32_cmpnlepd:
-    case X86::BI__builtin_ia32_cmpnlesd:
-      Imm = 6;
-      break;
-    case X86::BI__builtin_ia32_cmpordps:
-    case X86::BI__builtin_ia32_cmpordss:
-    case X86::BI__builtin_ia32_cmpordpd:
-    case X86::BI__builtin_ia32_cmpordsd:
-      Imm = 7;
-      break;
-    }
-
-    // Choose the intrinsic ID.
-    const char *name;
-    Intrinsic::ID ID;
-    switch (BuiltinID) {
-    default: llvm_unreachable("Unsupported intrinsic!");
-    case X86::BI__builtin_ia32_cmpeqps:
-    case X86::BI__builtin_ia32_cmpltps:
-    case X86::BI__builtin_ia32_cmpleps:
-    case X86::BI__builtin_ia32_cmpunordps:
-    case X86::BI__builtin_ia32_cmpneqps:
-    case X86::BI__builtin_ia32_cmpnltps:
-    case X86::BI__builtin_ia32_cmpnleps:
-    case X86::BI__builtin_ia32_cmpordps:
-      name = "cmpps";
-      ID = Intrinsic::x86_sse_cmp_ps;
-      break;
-    case X86::BI__builtin_ia32_cmpeqss:
-    case X86::BI__builtin_ia32_cmpltss:
-    case X86::BI__builtin_ia32_cmpless:
-    case X86::BI__builtin_ia32_cmpunordss:
-    case X86::BI__builtin_ia32_cmpneqss:
-    case X86::BI__builtin_ia32_cmpnltss:
-    case X86::BI__builtin_ia32_cmpnless:
-    case X86::BI__builtin_ia32_cmpordss:
-      name = "cmpss";
-      ID = Intrinsic::x86_sse_cmp_ss;
-      break;
-    case X86::BI__builtin_ia32_cmpeqpd:
-    case X86::BI__builtin_ia32_cmpltpd:
-    case X86::BI__builtin_ia32_cmplepd:
-    case X86::BI__builtin_ia32_cmpunordpd:
-    case X86::BI__builtin_ia32_cmpneqpd:
-    case X86::BI__builtin_ia32_cmpnltpd:
-    case X86::BI__builtin_ia32_cmpnlepd:
-    case X86::BI__builtin_ia32_cmpordpd:
-      name = "cmppd";
-      ID = Intrinsic::x86_sse2_cmp_pd;
-      break;
-    case X86::BI__builtin_ia32_cmpeqsd:
-    case X86::BI__builtin_ia32_cmpltsd:
-    case X86::BI__builtin_ia32_cmplesd:
-    case X86::BI__builtin_ia32_cmpunordsd:
-    case X86::BI__builtin_ia32_cmpneqsd:
-    case X86::BI__builtin_ia32_cmpnltsd:
-    case X86::BI__builtin_ia32_cmpnlesd:
-    case X86::BI__builtin_ia32_cmpordsd:
-      name = "cmpsd";
-      ID = Intrinsic::x86_sse2_cmp_sd;
-      break;
-    }
-
-    Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
-    llvm::Function *F = CGM.getIntrinsic(ID);
-    return Builder.CreateCall(F, Ops, name);
+    return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
   }
 }
 
@@ -6812,6 +7535,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
     return Builder.CreateCall(F, X);
   }
+
+  // Absolute value
+  case PPC::BI__builtin_vsx_xvabsdp:
+  case PPC::BI__builtin_vsx_xvabssp: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *X = EmitScalarExpr(E->getArg(0));
+    llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
+    return Builder.CreateCall(F, X);
+  }
+
   // FMA variations
   case PPC::BI__builtin_vsx_xvmaddadp:
   case PPC::BI__builtin_vsx_xvmaddasp:
@@ -6851,44 +7584,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
   }
 }
 
-// Emit an intrinsic that has 1 float or double.
-static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF,
-                                 const CallExpr *E,
-                                 unsigned IntrinsicID) {
-  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
-
-  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall(F, Src0);
-}
-
-// Emit an intrinsic that has 3 float or double operands.
-static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF,
-                                   const CallExpr *E,
-                                   unsigned IntrinsicID) {
-  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
-  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
-  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
-
-  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall(F, {Src0, Src1, Src2});
-}
-
-// Emit an intrinsic that has 1 float or double operand, and 1 integer.
-static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
-                               const CallExpr *E,
-                               unsigned IntrinsicID) {
-  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
-  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
-
-  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
-  return CGF.Builder.CreateCall(F, {Src0, Src1});
-}
-
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
                                               const CallExpr *E) {
   switch (BuiltinID) {
-  case AMDGPU::BI__builtin_amdgpu_div_scale:
-  case AMDGPU::BI__builtin_amdgpu_div_scalef: {
+  case AMDGPU::BI__builtin_amdgcn_div_scale:
+  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
     // Translate from the intrinsics's struct return to the builtin's out
     // argument.
 
@@ -6898,7 +7598,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
 
-    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
+    llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
                                            X->getType());
 
     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
@@ -6913,40 +7613,85 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     Builder.CreateStore(FlagExt, FlagOutPtr);
     return Result;
   }
-  case AMDGPU::BI__builtin_amdgpu_div_fmas:
-  case AMDGPU::BI__builtin_amdgpu_div_fmasf: {
+  case AMDGPU::BI__builtin_amdgcn_div_fmas:
+  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
 
-    llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
+    llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
                                       Src0->getType());
     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
   }
-  case AMDGPU::BI__builtin_amdgpu_div_fixup:
-  case AMDGPU::BI__builtin_amdgpu_div_fixupf:
-    return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
-  case AMDGPU::BI__builtin_amdgpu_trig_preop:
-  case AMDGPU::BI__builtin_amdgpu_trig_preopf:
-    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
-  case AMDGPU::BI__builtin_amdgpu_rcp:
-  case AMDGPU::BI__builtin_amdgpu_rcpf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
-  case AMDGPU::BI__builtin_amdgpu_rsq:
-  case AMDGPU::BI__builtin_amdgpu_rsqf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
-  case AMDGPU::BI__builtin_amdgpu_rsq_clamped:
-  case AMDGPU::BI__builtin_amdgpu_rsq_clampedf:
-    return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
-  case AMDGPU::BI__builtin_amdgpu_ldexp:
-  case AMDGPU::BI__builtin_amdgpu_ldexpf:
-    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
-  case AMDGPU::BI__builtin_amdgpu_class:
-  case AMDGPU::BI__builtin_amdgpu_classf:
-    return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
-   default:
+  case AMDGPU::BI__builtin_amdgcn_div_fixup:
+  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
+    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
+  case AMDGPU::BI__builtin_amdgcn_trig_preop:
+  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
+  case AMDGPU::BI__builtin_amdgcn_rcp:
+  case AMDGPU::BI__builtin_amdgcn_rcpf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
+  case AMDGPU::BI__builtin_amdgcn_rsq:
+  case AMDGPU::BI__builtin_amdgcn_rsqf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
+  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
+  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
+  case AMDGPU::BI__builtin_amdgcn_sinf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
+  case AMDGPU::BI__builtin_amdgcn_cosf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
+  case AMDGPU::BI__builtin_amdgcn_log_clampf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
+  case AMDGPU::BI__builtin_amdgcn_ldexp:
+  case AMDGPU::BI__builtin_amdgcn_ldexpf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
+  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
+  case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
+  }
+  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
+  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
+  }
+  case AMDGPU::BI__builtin_amdgcn_fract:
+  case AMDGPU::BI__builtin_amdgcn_fractf:
+    return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
+  case AMDGPU::BI__builtin_amdgcn_lerp:
+    return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
+  case AMDGPU::BI__builtin_amdgcn_class:
+  case AMDGPU::BI__builtin_amdgcn_classf:
+    return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
+
+  case AMDGPU::BI__builtin_amdgcn_read_exec: {
+    CallInst *CI = cast<CallInst>(
+      EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
+    CI->setConvergent();
+    return CI;
+  }
+
+  // amdgcn workitem
+  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
+    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
+  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
+    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
+  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
+    return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
+
+  // r600 intrinsics
+  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
+  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
+    return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
+  case AMDGPU::BI__builtin_r600_read_tidig_x:
+    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
+  case AMDGPU::BI__builtin_r600_read_tidig_y:
+    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
+  case AMDGPU::BI__builtin_r600_read_tidig_z:
+    return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
+  default:
     return nullptr;
   }
 }
@@ -7196,6 +7941,17 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
 
 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
                                              const CallExpr *E) {
+  auto MakeLdg = [&](unsigned IntrinsicID) {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    AlignmentSource AlignSource;
+    clang::CharUnits Align =
+        getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
+    return Builder.CreateCall(
+        CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
+                                       Ptr->getType()}),
+        {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
+  };
+
   switch (BuiltinID) {
   case NVPTX::BI__nvvm_atom_add_gen_i:
   case NVPTX::BI__nvvm_atom_add_gen_l:
@@ -7264,6 +8020,56 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(FnALAF32, {Ptr, Val});
   }
 
+  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    Value *FnALI32 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
+    return Builder.CreateCall(FnALI32, {Ptr, Val});
+  }
+
+  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
+    Value *Ptr = EmitScalarExpr(E->getArg(0));
+    Value *Val = EmitScalarExpr(E->getArg(1));
+    Value *FnALD32 =
+        CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
+    return Builder.CreateCall(FnALD32, {Ptr, Val});
+  }
+
+  case NVPTX::BI__nvvm_ldg_c:
+  case NVPTX::BI__nvvm_ldg_c2:
+  case NVPTX::BI__nvvm_ldg_c4:
+  case NVPTX::BI__nvvm_ldg_s:
+  case NVPTX::BI__nvvm_ldg_s2:
+  case NVPTX::BI__nvvm_ldg_s4:
+  case NVPTX::BI__nvvm_ldg_i:
+  case NVPTX::BI__nvvm_ldg_i2:
+  case NVPTX::BI__nvvm_ldg_i4:
+  case NVPTX::BI__nvvm_ldg_l:
+  case NVPTX::BI__nvvm_ldg_ll:
+  case NVPTX::BI__nvvm_ldg_ll2:
+  case NVPTX::BI__nvvm_ldg_uc:
+  case NVPTX::BI__nvvm_ldg_uc2:
+  case NVPTX::BI__nvvm_ldg_uc4:
+  case NVPTX::BI__nvvm_ldg_us:
+  case NVPTX::BI__nvvm_ldg_us2:
+  case NVPTX::BI__nvvm_ldg_us4:
+  case NVPTX::BI__nvvm_ldg_ui:
+  case NVPTX::BI__nvvm_ldg_ui2:
+  case NVPTX::BI__nvvm_ldg_ui4:
+  case NVPTX::BI__nvvm_ldg_ul:
+  case NVPTX::BI__nvvm_ldg_ull:
+  case NVPTX::BI__nvvm_ldg_ull2:
+    // PTX Interoperability section 2.2: "For a vector with an even number of
+    // elements, its alignment is set to number of elements times the alignment
+    // of its member: n*alignof(t)."
+    return MakeLdg(Intrinsic::nvvm_ldg_global_i);
+  case NVPTX::BI__nvvm_ldg_f:
+  case NVPTX::BI__nvvm_ldg_f2:
+  case NVPTX::BI__nvvm_ldg_f4:
+  case NVPTX::BI__nvvm_ldg_d:
+  case NVPTX::BI__nvvm_ldg_d2:
+    return MakeLdg(Intrinsic::nvvm_ldg_global_f);
   default:
     return nullptr;
   }
@@ -7272,9 +8078,9 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
                                                    const CallExpr *E) {
   switch (BuiltinID) {
-  case WebAssembly::BI__builtin_wasm_memory_size: {
+  case WebAssembly::BI__builtin_wasm_current_memory: {
     llvm::Type *ResultType = ConvertType(E->getType());
-    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
+    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
     return Builder.CreateCall(Callee);
   }
   case WebAssembly::BI__builtin_wasm_grow_memory: {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp
new file mode 100644
index 0000000..ea3b888
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDABuiltin.cpp
@@ -0,0 +1,117 @@
+//===----- CGCUDABuiltin.cpp - Codegen for CUDA builtins ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Generates code for built-in CUDA calls which are not runtime-specific.
+// (Runtime-specific codegen lives in CGCUDARuntime.)
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenFunction.h"
+#include "clang/Basic/Builtins.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
+  llvm::Type *ArgTypes[] = {llvm::Type::getInt8PtrTy(M.getContext()),
+                            llvm::Type::getInt8PtrTy(M.getContext())};
+  llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
+      llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
+
+  if (auto* F = M.getFunction("vprintf")) {
+    // Our CUDA system header declares vprintf with the right signature, so
+    // nobody else should have been able to declare vprintf with a bogus
+    // signature.
+    assert(F->getFunctionType() == VprintfFuncType);
+    return F;
+  }
+
+  // vprintf doesn't already exist; create a declaration and insert it into the
+  // module.
+  return llvm::Function::Create(
+      VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
+}
+
+// Transforms a call to printf into a call to the NVPTX vprintf syscall (which
+// isn't particularly special; it's invoked just like a regular function).
+// vprintf takes two args: A format string, and a pointer to a buffer containing
+// the varargs.
+//
+// For example, the call
+//
+//   printf("format string", arg1, arg2, arg3);
+//
+// is converted into something resembling
+//
+//   struct Tmp {
+//     Arg1 a1;
+//     Arg2 a2;
+//     Arg3 a3;
+//   };
+//   char* buf = alloca(sizeof(Tmp));
+//   *(Tmp*)buf = {a1, a2, a3};
+//   vprintf("format string", buf);
+//
+// buf is aligned to the max of {alignof(Arg1), ...}.  Furthermore, each of the
+// args is itself aligned to its preferred alignment.
+//
+// Note that by the time this function runs, E's args have already undergone the
+// standard C vararg promotion (short -> int, float -> double, etc.).
+RValue
+CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E,
+                                              ReturnValueSlot ReturnValue) {
+  assert(getLangOpts().CUDA);
+  assert(getLangOpts().CUDAIsDevice);
+  assert(E->getBuiltinCallee() == Builtin::BIprintf);
+  assert(E->getNumArgs() >= 1); // printf always has at least one arg.
+
+  const llvm::DataLayout &DL = CGM.getDataLayout();
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+
+  CallArgList Args;
+  EmitCallArgs(Args,
+               E->getDirectCallee()->getType()->getAs<FunctionProtoType>(),
+               E->arguments(), E->getDirectCallee(),
+               /* ParamsToSkip = */ 0);
+
+  // We don't know how to emit non-scalar varargs.
+  if (std::any_of(Args.begin() + 1, Args.end(),
+                  [](const CallArg &A) { return !A.RV.isScalar(); })) {
+    CGM.ErrorUnsupported(E, "non-scalar arg to printf");
+    return RValue::get(llvm::ConstantInt::get(IntTy, 0));
+  }
+
+  // Construct and fill the args buffer that we'll pass to vprintf.
+  llvm::Value *BufferPtr;
+  if (Args.size() <= 1) {
+    // If there are no args, pass a null pointer to vprintf.
+    BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
+  } else {
+    llvm::SmallVector<llvm::Type *, 8> ArgTypes;
+    for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
+      ArgTypes.push_back(Args[I].RV.getScalarVal()->getType());
+    llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args");
+    llvm::Value *Alloca = CreateTempAlloca(AllocaTy);
+
+    for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
+      llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
+      llvm::Value *Arg = Args[I].RV.getScalarVal();
+      Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
+    }
+    BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
+  }
+
+  // Invoke vprintf and return.
+  llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule());
+  return RValue::get(
+      Builder.CreateCall(VprintfFunc, {Args[0].RV.getScalarVal(), BufferPtr}));
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
index 045e19b..6a04d4e 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
@@ -38,6 +38,7 @@ private:
   llvm::Module &TheModule;
   /// Keeps track of kernel launch stubs emitted in this module
   llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
+  llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
   /// Keeps track of variables containing handles of GPU binaries. Populated by
   /// ModuleCtorFunction() and used to create corresponding cleanup calls in
   /// ModuleDtorFunction()
@@ -47,7 +48,7 @@ private:
   llvm::Constant *getLaunchFn() const;
 
   /// Creates a function to register all kernel stubs generated in this module.
-  llvm::Function *makeRegisterKernelsFn();
+  llvm::Function *makeRegisterGlobalsFn();
 
   /// Helper function that generates a constant string and returns a pointer to
   /// the start of the string.  The result of this function can be used anywhere
@@ -68,6 +69,10 @@ public:
   CGNVCUDARuntime(CodeGenModule &CGM);
 
   void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
+  void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override {
+    DeviceVars.push_back(std::make_pair(&Var, Flags));
+  }
+
   /// Creates module constructor function
   llvm::Function *makeModuleCtorFunction() override;
   /// Creates module destructor function
@@ -93,10 +98,7 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
 
 llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
   // cudaError_t cudaSetupArgument(void *, size_t, size_t)
-  std::vector<llvm::Type*> Params;
-  Params.push_back(VoidPtrTy);
-  Params.push_back(SizeTy);
-  Params.push_back(SizeTy);
+  llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
   return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
                                                            Params, false),
                                    "cudaSetupArgument");
@@ -158,19 +160,28 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
   CGF.EmitBlock(EndBlock);
 }
 
-/// Creates internal function to register all kernel stubs generated in this
-/// module with the CUDA runtime.
+/// Creates a function that sets up state on the host side for CUDA objects that
+/// have a presence on both the host and device sides. Specifically, registers
+/// the host side of kernel functions and device global variables with the CUDA
+/// runtime.
 /// \code
-/// void __cuda_register_kernels(void** GpuBinaryHandle) {
+/// void __cuda_register_globals(void** GpuBinaryHandle) {
 ///    __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
 ///    ...
 ///    __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
+///    __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
+///    ...
+///    __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
 /// }
 /// \endcode
-llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
+llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
+  // No need to register anything
+  if (EmittedKernels.empty() && DeviceVars.empty())
+    return nullptr;
+
   llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
-      llvm::GlobalValue::InternalLinkage, "__cuda_register_kernels", &TheModule);
+      llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
   llvm::BasicBlock *EntryBB =
       llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
   CGBuilderTy Builder(CGM, Context);
@@ -178,7 +189,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
 
   // void __cudaRegisterFunction(void **, const char *, char *, const char *,
   //                             int, uint3*, uint3*, dim3*, dim3*, int*)
-  std::vector<llvm::Type *> RegisterFuncParams = {
+  llvm::Type *RegisterFuncParams[] = {
       VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
       VoidPtrTy,    VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
   llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
@@ -186,18 +197,44 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
       "__cudaRegisterFunction");
 
   // Extract GpuBinaryHandle passed as the first argument passed to
-  // __cuda_register_kernels() and generate __cudaRegisterFunction() call for
+  // __cuda_register_globals() and generate __cudaRegisterFunction() call for
   // each emitted kernel.
   llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
   for (llvm::Function *Kernel : EmittedKernels) {
     llvm::Constant *KernelName = makeConstantString(Kernel->getName());
     llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
-    llvm::Value *args[] = {
+    llvm::Value *Args[] = {
         &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy),
         KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr,
         NullPtr, NullPtr, NullPtr,
         llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
-    Builder.CreateCall(RegisterFunc, args);
+    Builder.CreateCall(RegisterFunc, Args);
+  }
+
+  // void __cudaRegisterVar(void **, char *, char *, const char *,
+  //                        int, int, int, int)
+  llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
+                                     CharPtrTy,    IntTy,     IntTy,
+                                     IntTy,        IntTy};
+  llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction(
+      llvm::FunctionType::get(IntTy, RegisterVarParams, false),
+      "__cudaRegisterVar");
+  for (auto &Pair : DeviceVars) {
+    llvm::GlobalVariable *Var = Pair.first;
+    unsigned Flags = Pair.second;
+    llvm::Constant *VarName = makeConstantString(Var->getName());
+    uint64_t VarSize =
+        CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
+    llvm::Value *Args[] = {
+        &GpuBinaryHandlePtr,
+        Builder.CreateBitCast(Var, VoidPtrTy),
+        VarName,
+        VarName,
+        llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),
+        llvm::ConstantInt::get(IntTy, VarSize),
+        llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),
+        llvm::ConstantInt::get(IntTy, 0)};
+    Builder.CreateCall(RegisterVar, Args);
   }
 
   Builder.CreateRetVoid();
@@ -208,15 +245,19 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
 /// \code
 /// void __cuda_module_ctor(void*) {
 ///     Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0);
-///     __cuda_register_kernels(Handle0);
+///     __cuda_register_globals(Handle0);
 ///     ...
 ///     HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN);
-///     __cuda_register_kernels(HandleN);
+///     __cuda_register_globals(HandleN);
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
-  // void __cuda_register_kernels(void* handle);
-  llvm::Function *RegisterKernelsFunc = makeRegisterKernelsFn();
+  // No need to generate ctors/dtors if there are no GPU binaries.
+  if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
+    return nullptr;
+
+  // void __cuda_register_globals(void* handle);
+  llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
   // void ** __cudaRegisterFatBinary(void *);
   llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
@@ -259,6 +300,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
         TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage,
         llvm::ConstantStruct::get(FatbinWrapperTy, Values),
         "__cuda_fatbin_wrapper");
+    // NVIDIA's cuobjdump looks for fatbins in this section.
+    FatbinWrapper->setSection(".nvFatBinSegment");
 
     // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
     llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
@@ -270,8 +313,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
     CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
                                    CGM.getPointerAlign());
 
-    // Call __cuda_register_kernels(GpuBinaryHandle);
-    CtorBuilder.CreateCall(RegisterKernelsFunc, RegisterFatbinCall);
+    // Call __cuda_register_globals(GpuBinaryHandle);
+    if (RegisterGlobalsFunc)
+      CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
 
     // Save GpuBinaryHandle so we can unregister it in destructor.
     GpuBinaryHandles.push_back(GpuBinaryHandle);
@@ -291,6 +335,10 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
 /// }
 /// \endcode
 llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
+  // No need for destructor if we don't have handles to unregister.
+  if (GpuBinaryHandles.empty())
+    return nullptr;
+
   // void __cudaUnregisterFatBinary(void ** handle);
   llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h
index dcacf97..0168f4f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDARuntime.h
@@ -18,6 +18,7 @@
 
 namespace llvm {
 class Function;
+class GlobalVariable;
 }
 
 namespace clang {
@@ -37,6 +38,12 @@ protected:
   CodeGenModule &CGM;
 
 public:
+  // Global variable properties that must be passed to CUDA runtime.
+  enum DeviceVarFlags {
+    ExternDeviceVar = 0x01,   // extern
+    ConstantDeviceVar = 0x02, // __constant__
+  };
+
   CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {}
   virtual ~CGCUDARuntime();
 
@@ -46,6 +53,7 @@ public:
 
   /// Emits a kernel launch stub.
   virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
+  virtual void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) = 0;
 
   /// Constructs and returns a module initialization function or nullptr if it's
   /// not needed. Must be called after all kernels have been emitted.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
index 6847df9..40f1bc4 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXX.cpp
@@ -164,7 +164,7 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
     // members with attribute "AlwaysInline" and expect no reference to
     // be generated. It is desirable to reenable this optimisation after
     // corresponding LLVM changes.
-    Replacements[MangledName] = Aliasee;
+    addReplacement(MangledName, Aliasee);
     return false;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
index 3f240b1..9e10ec0 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCXXABI.h
@@ -106,6 +106,16 @@ public:
 
   virtual bool hasMostDerivedReturn(GlobalDecl GD) const { return false; }
 
+  /// Returns true if the target allows calling a function through a pointer
+  /// with a different signature than the actual function (or equivalently,
+  /// bitcasting a function or function pointer to a different function type).
+  /// In principle in the most general case this could depend on the target, the
+  /// calling convention, and the actual types of the arguments and return
+  /// value. Here it just means whether the signature mismatch could *ever* be
+  /// allowed; in other words, does the target do strict checking of signatures
+  /// for all calls.
+  virtual bool canCallMismatchedFunctionType() const { return true; }
+
   /// If the C++ ABI requires the given type be returned in a particular way,
   /// this method sets RetAI and returns true.
   virtual bool classifyReturnType(CGFunctionInfo &FI) const = 0;
@@ -326,6 +336,12 @@ public:
   virtual void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
                                          FunctionArgList &Params) = 0;
 
+  /// Get the ABI-specific "this" parameter adjustment to apply in the prologue
+  /// of a virtual function.
+  virtual CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
+    return CharUnits::Zero();
+  }
+
   /// Perform ABI-specific "this" parameter adjustment in a virtual function
   /// prologue.
   virtual llvm::Value *adjustThisParameterInVirtualFunctionPrologue(
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
index 9359850..242b596 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCall.cpp
@@ -14,6 +14,7 @@
 
 #include "CGCall.h"
 #include "ABIInfo.h"
+#include "CGBlocks.h"
 #include "CGCXXABI.h"
 #include "CGCleanup.h"
 #include "CodeGenFunction.h"
@@ -25,9 +26,11 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/SwiftCallingConv.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
@@ -39,7 +42,7 @@ using namespace CodeGen;
 
 /***/
 
-static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
+unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
   switch (CC) {
   default: return llvm::CallingConv::C;
   case CC_X86StdCall: return llvm::CallingConv::X86_StdCall;
@@ -55,7 +58,10 @@ static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
   // TODO: Add support for __vectorcall to LLVM.
   case CC_X86VectorCall: return llvm::CallingConv::X86_VectorCall;
   case CC_SpirFunction: return llvm::CallingConv::SPIR_FUNC;
-  case CC_SpirKernel: return llvm::CallingConv::SPIR_KERNEL;
+  case CC_OpenCLKernel: return CGM.getTargetCodeGenInfo().getOpenCLKernelCallingConv();
+  case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
+  case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
+  case CC_Swift: return llvm::CallingConv::Swift;
   }
 }
 
@@ -90,15 +96,25 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
   return arrangeLLVMFunctionInfo(FTNP->getReturnType().getUnqualifiedType(),
                                  /*instanceMethod=*/false,
                                  /*chainCall=*/false, None,
-                                 FTNP->getExtInfo(), RequiredArgs(0));
+                                 FTNP->getExtInfo(), {}, RequiredArgs(0));
 }
 
 /// Adds the formal paramaters in FPT to the given prefix. If any parameter in
 /// FPT has pass_object_size attrs, then we'll add parameters for those, too.
 static void appendParameterTypes(const CodeGenTypes &CGT,
                                  SmallVectorImpl<CanQualType> &prefix,
-                                 const CanQual<FunctionProtoType> &FPT,
+              SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
+                                 CanQual<FunctionProtoType> FPT,
                                  const FunctionDecl *FD) {
+  // Fill out paramInfos.
+  if (FPT->hasExtParameterInfos() || !paramInfos.empty()) {
+    assert(paramInfos.size() <= prefix.size());
+    auto protoParamInfos = FPT->getExtParameterInfos();
+    paramInfos.reserve(prefix.size() + protoParamInfos.size());
+    paramInfos.resize(prefix.size());
+    paramInfos.append(protoParamInfos.begin(), protoParamInfos.end());
+  }
+
   // Fast path: unknown target.
   if (FD == nullptr) {
     prefix.append(FPT->param_type_begin(), FPT->param_type_end());
@@ -125,13 +141,17 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
                         SmallVectorImpl<CanQualType> &prefix,
                         CanQual<FunctionProtoType> FTP,
                         const FunctionDecl *FD) {
-  RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
+  SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+  RequiredArgs Required =
+      RequiredArgs::forPrototypePlus(FTP, prefix.size(), FD);
   // FIXME: Kill copy.
-  appendParameterTypes(CGT, prefix, FTP, FD);
+  appendParameterTypes(CGT, prefix, paramInfos, FTP, FD);
   CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
+
   return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod,
                                      /*chainCall=*/false, prefix,
-                                     FTP->getExtInfo(), required);
+                                     FTP->getExtInfo(), paramInfos,
+                                     Required);
 }
 
 /// Arrange the argument and result information for a value of the
@@ -173,6 +193,12 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
   if (D->hasAttr<SysVABIAttr>())
     return IsWindows ? CC_X86_64SysV : CC_C;
 
+  if (D->hasAttr<PreserveMostAttr>())
+    return CC_PreserveMost;
+
+  if (D->hasAttr<PreserveAllAttr>())
+    return CC_PreserveAll;
+
   return CC_C;
 }
 
@@ -219,16 +245,33 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
   return arrangeFreeFunctionType(prototype, MD);
 }
 
+bool CodeGenTypes::inheritingCtorHasParams(
+    const InheritedConstructor &Inherited, CXXCtorType Type) {
+  // Parameters are unnecessary if we're constructing a base class subobject
+  // and the inherited constructor lives in a virtual base.
+  return Type == Ctor_Complete ||
+         !Inherited.getShadowDecl()->constructsVirtualBase() ||
+         !Target.getCXXABI().hasConstructorVariants();
+  }
+
 const CGFunctionInfo &
 CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
                                             StructorType Type) {
 
   SmallVector<CanQualType, 16> argTypes;
+  SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
   argTypes.push_back(GetThisType(Context, MD->getParent()));
 
+  bool PassParams = true;
+
   GlobalDecl GD;
   if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
     GD = GlobalDecl(CD, toCXXCtorType(Type));
+
+    // A base class inheriting constructor doesn't get forwarded arguments
+    // needed to construct a virtual base (or base class thereof).
+    if (auto Inherited = CD->getInheritedConstructor())
+      PassParams = inheritingCtorHasParams(Inherited, toCXXCtorType(Type));
   } else {
     auto *DD = dyn_cast<CXXDestructorDecl>(MD);
     GD = GlobalDecl(DD, toCXXDtorType(Type));
@@ -237,12 +280,14 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
   CanQual<FunctionProtoType> FTP = GetFormalType(MD);
 
   // Add the formal parameters.
-  appendParameterTypes(*this, argTypes, FTP, MD);
+  if (PassParams)
+    appendParameterTypes(*this, argTypes, paramInfos, FTP, MD);
 
   TheCXXABI.buildStructorSignature(MD, Type, argTypes);
 
   RequiredArgs required =
-      (MD->isVariadic() ? RequiredArgs(argTypes.size()) : RequiredArgs::All);
+      (PassParams && MD->isVariadic() ? RequiredArgs(argTypes.size())
+                                      : RequiredArgs::All);
 
   FunctionType::ExtInfo extInfo = FTP->getExtInfo();
   CanQualType resultType = TheCXXABI.HasThisReturn(GD)
@@ -252,7 +297,53 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
                                      : Context.VoidTy;
   return arrangeLLVMFunctionInfo(resultType, /*instanceMethod=*/true,
                                  /*chainCall=*/false, argTypes, extInfo,
-                                 required);
+                                 paramInfos, required);
+}
+
+static SmallVector<CanQualType, 16>
+getArgTypesForCall(ASTContext &ctx, const CallArgList &args) {
+  SmallVector<CanQualType, 16> argTypes;
+  for (auto &arg : args)
+    argTypes.push_back(ctx.getCanonicalParamType(arg.Ty));
+  return argTypes;
+}
+
+static SmallVector<CanQualType, 16>
+getArgTypesForDeclaration(ASTContext &ctx, const FunctionArgList &args) {
+  SmallVector<CanQualType, 16> argTypes;
+  for (auto &arg : args)
+    argTypes.push_back(ctx.getCanonicalParamType(arg->getType()));
+  return argTypes;
+}
+
+static void addExtParameterInfosForCall(
+         llvm::SmallVectorImpl<FunctionProtoType::ExtParameterInfo> &paramInfos,
+                                        const FunctionProtoType *proto,
+                                        unsigned prefixArgs,
+                                        unsigned totalArgs) {
+  assert(proto->hasExtParameterInfos());
+  assert(paramInfos.size() <= prefixArgs);
+  assert(proto->getNumParams() + prefixArgs <= totalArgs);
+
+  // Add default infos for any prefix args that don't already have infos.
+  paramInfos.resize(prefixArgs);
+
+  // Add infos for the prototype.
+  auto protoInfos = proto->getExtParameterInfos();
+  paramInfos.append(protoInfos.begin(), protoInfos.end());
+
+  // Add default infos for the variadic arguments.
+  paramInfos.resize(totalArgs);
+}
+
+static llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16>
+getExtParameterInfosForCall(const FunctionProtoType *proto,
+                            unsigned prefixArgs, unsigned totalArgs) {
+  llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> result;
+  if (proto->hasExtParameterInfos()) {
+    addExtParameterInfosForCall(result, proto, prefixArgs, totalArgs);
+  }
+  return result;
 }
 
 /// Arrange a call to a C++ method, passing the given arguments.
@@ -267,7 +358,7 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
     ArgTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
 
   CanQual<FunctionProtoType> FPT = GetFormalType(D);
-  RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs);
+  RequiredArgs Required = RequiredArgs::forPrototypePlus(FPT, 1 + ExtraArgs, D);
   GlobalDecl GD(D, CtorKind);
   CanQualType ResultType = TheCXXABI.HasThisReturn(GD)
                                ? ArgTypes.front()
@@ -276,9 +367,11 @@ CodeGenTypes::arrangeCXXConstructorCall(const CallArgList &args,
                                      : Context.VoidTy;
 
   FunctionType::ExtInfo Info = FPT->getExtInfo();
+  auto ParamInfos = getExtParameterInfosForCall(FPT.getTypePtr(), 1 + ExtraArgs,
+                                                ArgTypes.size());
   return arrangeLLVMFunctionInfo(ResultType, /*instanceMethod=*/true,
                                  /*chainCall=*/false, ArgTypes, Info,
-                                 Required);
+                                 ParamInfos, Required);
 }
 
 /// Arrange the argument and result information for the declaration or
@@ -299,7 +392,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
     CanQual<FunctionNoProtoType> noProto = FTy.getAs<FunctionNoProtoType>();
     return arrangeLLVMFunctionInfo(
         noProto->getReturnType(), /*instanceMethod=*/false,
-        /*chainCall=*/false, None, noProto->getExtInfo(), RequiredArgs::All);
+        /*chainCall=*/false, None, noProto->getExtInfo(), {},RequiredArgs::All);
   }
 
   assert(isa<FunctionProtoType>(FTy));
@@ -328,7 +421,7 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
   argTys.push_back(Context.getCanonicalParamType(receiverType));
   argTys.push_back(Context.getCanonicalParamType(Context.getObjCSelType()));
   // FIXME: Kill copy?
-  for (const auto *I : MD->params()) {
+  for (const auto *I : MD->parameters()) {
     argTys.push_back(Context.getCanonicalParamType(I->getType()));
   }
 
@@ -345,7 +438,18 @@ CodeGenTypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
 
   return arrangeLLVMFunctionInfo(
       GetReturnType(MD->getReturnType()), /*instanceMethod=*/false,
-      /*chainCall=*/false, argTys, einfo, required);
+      /*chainCall=*/false, argTys, einfo, {}, required);
+}
+
+const CGFunctionInfo &
+CodeGenTypes::arrangeUnprototypedObjCMessageSend(QualType returnType,
+                                                 const CallArgList &args) {
+  auto argTypes = getArgTypesForCall(Context, args);
+  FunctionType::ExtInfo einfo;
+
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(returnType), /*instanceMethod=*/false,
+      /*chainCall=*/false, argTypes, einfo, {}, RequiredArgs::All);
 }
 
 const CGFunctionInfo &
@@ -374,7 +478,7 @@ CodeGenTypes::arrangeMSMemberPointerThunk(const CXXMethodDecl *MD) {
   CanQualType ArgTys[] = { GetThisType(Context, MD->getParent()) };
   return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/false,
                                  /*chainCall=*/false, ArgTys,
-                                 FTP->getExtInfo(), RequiredArgs(1));
+                                 FTP->getExtInfo(), {}, RequiredArgs(1));
 }
 
 const CGFunctionInfo &
@@ -394,7 +498,8 @@ CodeGenTypes::arrangeMSCtorClosure(const CXXConstructorDecl *CD,
       /*IsVariadic=*/false, /*IsCXXMethod=*/true);
   return arrangeLLVMFunctionInfo(Context.VoidTy, /*instanceMethod=*/true,
                                  /*chainCall=*/false, ArgTys,
-                                 FunctionType::ExtInfo(CC), RequiredArgs::All);
+                                 FunctionType::ExtInfo(CC), {},
+                                 RequiredArgs::All);
 }
 
 /// Arrange a call as unto a free function, except possibly with an
@@ -408,6 +513,8 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
                             bool chainCall) {
   assert(args.size() >= numExtraRequiredArgs);
 
+  llvm::SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+
   // In most cases, there are no optional arguments.
   RequiredArgs required = RequiredArgs::All;
 
@@ -417,6 +524,10 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
     if (proto->isVariadic())
       required = RequiredArgs(proto->getNumParams() + numExtraRequiredArgs);
 
+    if (proto->hasExtParameterInfos())
+      addExtParameterInfosForCall(paramInfos, proto, numExtraRequiredArgs,
+                                  args.size());
+
   // If we don't have a prototype at all, but we're supposed to
   // explicitly use the variadic convention for unprototyped calls,
   // treat all of the arguments as required but preserve the nominal
@@ -433,7 +544,8 @@ arrangeFreeFunctionLikeCall(CodeGenTypes &CGT,
     argTypes.push_back(CGT.getContext().getCanonicalParamType(arg.Ty));
   return CGT.arrangeLLVMFunctionInfo(GetReturnType(fnType->getReturnType()),
                                      /*instanceMethod=*/false, chainCall,
-                                     argTypes, fnType->getExtInfo(), required);
+                                     argTypes, fnType->getExtInfo(), paramInfos,
+                                     required);
 }
 
 /// Figure out the rules for calling a function with the given formal
@@ -448,7 +560,7 @@ CodeGenTypes::arrangeFreeFunctionCall(const CallArgList &args,
                                      chainCall ? 1 : 0, chainCall);
 }
 
-/// A block function call is essentially a free-function call with an
+/// A block function is essentially a free function with an
 /// extra implicit argument.
 const CGFunctionInfo &
 CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args,
@@ -458,54 +570,99 @@ CodeGenTypes::arrangeBlockFunctionCall(const CallArgList &args,
 }
 
 const CGFunctionInfo &
-CodeGenTypes::arrangeFreeFunctionCall(QualType resultType,
-                                      const CallArgList &args,
-                                      FunctionType::ExtInfo info,
-                                      RequiredArgs required) {
+CodeGenTypes::arrangeBlockFunctionDeclaration(const FunctionProtoType *proto,
+                                              const FunctionArgList &params) {
+  auto paramInfos = getExtParameterInfosForCall(proto, 1, params.size());
+  auto argTypes = getArgTypesForDeclaration(Context, params);
+
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(proto->getReturnType()),
+      /*instanceMethod*/ false, /*chainCall*/ false, argTypes,
+      proto->getExtInfo(), paramInfos,
+      RequiredArgs::forPrototypePlus(proto, 1, nullptr));
+}
+
+const CGFunctionInfo &
+CodeGenTypes::arrangeBuiltinFunctionCall(QualType resultType,
+                                         const CallArgList &args) {
   // FIXME: Kill copy.
   SmallVector<CanQualType, 16> argTypes;
   for (const auto &Arg : args)
     argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
   return arrangeLLVMFunctionInfo(
       GetReturnType(resultType), /*instanceMethod=*/false,
-      /*chainCall=*/false, argTypes, info, required);
+      /*chainCall=*/false, argTypes, FunctionType::ExtInfo(),
+      /*paramInfos=*/ {}, RequiredArgs::All);
 }
 
-/// Arrange a call to a C++ method, passing the given arguments.
 const CGFunctionInfo &
-CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
-                                   const FunctionProtoType *FPT,
-                                   RequiredArgs required) {
-  // FIXME: Kill copy.
-  SmallVector<CanQualType, 16> argTypes;
-  for (const auto &Arg : args)
-    argTypes.push_back(Context.getCanonicalParamType(Arg.Ty));
+CodeGenTypes::arrangeBuiltinFunctionDeclaration(QualType resultType,
+                                                const FunctionArgList &args) {
+  auto argTypes = getArgTypesForDeclaration(Context, args);
+
+  return arrangeLLVMFunctionInfo(
+      GetReturnType(resultType), /*instanceMethod=*/false, /*chainCall=*/false,
+      argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+}
 
-  FunctionType::ExtInfo info = FPT->getExtInfo();
+const CGFunctionInfo &
+CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
+                                              ArrayRef<CanQualType> argTypes) {
   return arrangeLLVMFunctionInfo(
-      GetReturnType(FPT->getReturnType()), /*instanceMethod=*/true,
-      /*chainCall=*/false, argTypes, info, required);
+      resultType, /*instanceMethod=*/false, /*chainCall=*/false,
+      argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All);
 }
 
-const CGFunctionInfo &CodeGenTypes::arrangeFreeFunctionDeclaration(
-    QualType resultType, const FunctionArgList &args,
-    const FunctionType::ExtInfo &info, bool isVariadic) {
+/// Arrange a call to a C++ method, passing the given arguments.
+const CGFunctionInfo &
+CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
+                                   const FunctionProtoType *proto,
+                                   RequiredArgs required) {
+  unsigned numRequiredArgs =
+    (proto->isVariadic() ? required.getNumRequiredArgs() : args.size());
+  unsigned numPrefixArgs = numRequiredArgs - proto->getNumParams();
+  auto paramInfos =
+    getExtParameterInfosForCall(proto, numPrefixArgs, args.size());
+
   // FIXME: Kill copy.
-  SmallVector<CanQualType, 16> argTypes;
-  for (auto Arg : args)
-    argTypes.push_back(Context.getCanonicalParamType(Arg->getType()));
+  auto argTypes = getArgTypesForCall(Context, args);
 
-  RequiredArgs required =
-    (isVariadic ? RequiredArgs(args.size()) : RequiredArgs::All);
+  FunctionType::ExtInfo info = proto->getExtInfo();
   return arrangeLLVMFunctionInfo(
-      GetReturnType(resultType), /*instanceMethod=*/false,
-      /*chainCall=*/false, argTypes, info, required);
+      GetReturnType(proto->getReturnType()), /*instanceMethod=*/true,
+      /*chainCall=*/false, argTypes, info, paramInfos, required);
 }
 
 const CGFunctionInfo &CodeGenTypes::arrangeNullaryFunction() {
   return arrangeLLVMFunctionInfo(
       getContext().VoidTy, /*instanceMethod=*/false, /*chainCall=*/false,
-      None, FunctionType::ExtInfo(), RequiredArgs::All);
+      None, FunctionType::ExtInfo(), {}, RequiredArgs::All);
+}
+
+const CGFunctionInfo &
+CodeGenTypes::arrangeCall(const CGFunctionInfo &signature,
+                          const CallArgList &args) {
+  assert(signature.arg_size() <= args.size());
+  if (signature.arg_size() == args.size())
+    return signature;
+
+  SmallVector<FunctionProtoType::ExtParameterInfo, 16> paramInfos;
+  auto sigParamInfos = signature.getExtParameterInfos();
+  if (!sigParamInfos.empty()) {
+    paramInfos.append(sigParamInfos.begin(), sigParamInfos.end());
+    paramInfos.resize(args.size());
+  }
+
+  auto argTypes = getArgTypesForCall(Context, args);
+
+  assert(signature.getRequiredArgs().allowsOptionalArgs());
+  return arrangeLLVMFunctionInfo(signature.getReturnType(),
+                                 signature.isInstanceMethod(),
+                                 signature.isChainCall(),
+                                 argTypes,
+                                 signature.getExtInfo(),
+                                 paramInfos,
+                                 signature.getRequiredArgs());
 }
 
 /// Arrange the argument and result information for an abstract value
@@ -517,25 +674,26 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
                                       bool chainCall,
                                       ArrayRef<CanQualType> argTypes,
                                       FunctionType::ExtInfo info,
+                     ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
                                       RequiredArgs required) {
   assert(std::all_of(argTypes.begin(), argTypes.end(),
                      std::mem_fun_ref(&CanQualType::isCanonicalAsParam)));
 
-  unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
-
   // Lookup or create unique function info.
   llvm::FoldingSetNodeID ID;
-  CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, required,
-                          resultType, argTypes);
+  CGFunctionInfo::Profile(ID, instanceMethod, chainCall, info, paramInfos,
+                          required, resultType, argTypes);
 
   void *insertPos = nullptr;
   CGFunctionInfo *FI = FunctionInfos.FindNodeOrInsertPos(ID, insertPos);
   if (FI)
     return *FI;
 
+  unsigned CC = ClangCallConvToLLVMCallConv(info.getCC());
+
   // Construct the function info.  We co-allocate the ArgInfos.
   FI = CGFunctionInfo::create(CC, instanceMethod, chainCall, info,
-                              resultType, argTypes, required);
+                              paramInfos, resultType, argTypes, required);
   FunctionInfos.InsertNode(FI, insertPos);
 
   bool inserted = FunctionsBeingProcessed.insert(FI).second;
@@ -543,7 +701,11 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
   assert(inserted && "Recursively being processed?");
   
   // Compute ABI information.
-  getABIInfo().computeInfo(*FI);
+  if (info.getCC() != CC_Swift) {
+    getABIInfo().computeInfo(*FI);
+  } else {
+    swiftcall::computeABIInfo(CGM, *FI);
+  }
 
   // Loop over all of the computed argument and return value info.  If any of
   // them are direct or extend without a specified coerce type, specify the
@@ -566,11 +728,16 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
                                        bool instanceMethod,
                                        bool chainCall,
                                        const FunctionType::ExtInfo &info,
+                                       ArrayRef<ExtParameterInfo> paramInfos,
                                        CanQualType resultType,
                                        ArrayRef<CanQualType> argTypes,
                                        RequiredArgs required) {
-  void *buffer = operator new(sizeof(CGFunctionInfo) +
-                              sizeof(ArgInfo) * (argTypes.size() + 1));
+  assert(paramInfos.empty() || paramInfos.size() == argTypes.size());
+
+  void *buffer =
+    operator new(totalSizeToAlloc<ArgInfo,             ExtParameterInfo>(
+                                  argTypes.size() + 1, paramInfos.size()));
+
   CGFunctionInfo *FI = new(buffer) CGFunctionInfo();
   FI->CallingConvention = llvmCC;
   FI->EffectiveCallingConvention = llvmCC;
@@ -585,9 +752,12 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
   FI->ArgStruct = nullptr;
   FI->ArgStructAlign = 0;
   FI->NumArgs = argTypes.size();
+  FI->HasExtParameterInfos = !paramInfos.empty();
   FI->getArgsBuffer()[0].type = resultType;
   for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
     FI->getArgsBuffer()[i + 1].type = argTypes[i];
+  for (unsigned i = 0, e = paramInfos.size(); i != e; ++i)
+    FI->getExtParameterInfosBuffer()[i] = paramInfos[i];
   return FI;
 }
 
@@ -634,7 +804,8 @@ struct RecordExpansion : TypeExpansion {
 
   RecordExpansion(SmallVector<const CXXBaseSpecifier *, 1> &&Bases,
                   SmallVector<const FieldDecl *, 1> &&Fields)
-      : TypeExpansion(TEK_Record), Bases(Bases), Fields(Fields) {}
+      : TypeExpansion(TEK_Record), Bases(std::move(Bases)),
+        Fields(std::move(Fields)) {}
   static bool classof(const TypeExpansion *TE) {
     return TE->Kind == TEK_Record;
   }
@@ -773,7 +944,7 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF,
 }
 
 void CodeGenFunction::ExpandTypeFromArgs(
-    QualType Ty, LValue LV, SmallVectorImpl<llvm::Argument *>::iterator &AI) {
+    QualType Ty, LValue LV, SmallVectorImpl<llvm::Value *>::iterator &AI) {
   assert(LV.isSimple() &&
          "Unexpected non-simple lvalue during struct expansion.");
 
@@ -798,7 +969,7 @@ void CodeGenFunction::ExpandTypeFromArgs(
     }
     for (auto FD : RExp->Fields) {
       // FIXME: What are the right qualifiers here?
-      LValue SubLV = EmitLValueForField(LV, FD);
+      LValue SubLV = EmitLValueForFieldInitialization(LV, FD);
       ExpandTypeFromArgs(FD->getType(), SubLV, AI);
     }
   } else if (isa<ComplexExpansion>(Exp.get())) {
@@ -1220,11 +1391,13 @@ void ClangToLLVMArgMapping::construct(const ASTContext &Context,
       // ignore and inalloca doesn't have matching LLVM parameters.
       IRArgs.NumberOfArgs = 0;
       break;
-    case ABIArgInfo::Expand: {
+    case ABIArgInfo::CoerceAndExpand:
+      IRArgs.NumberOfArgs = AI.getCoerceAndExpandTypeSequence().size();
+      break;
+    case ABIArgInfo::Expand:
       IRArgs.NumberOfArgs = getExpansionSize(ArgType, Context);
       break;
     }
-    }
 
     if (IRArgs.NumberOfArgs > 0) {
       IRArgs.FirstArgIndex = IRArgNo;
@@ -1323,6 +1496,10 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
   case ABIArgInfo::Ignore:
     resultType = llvm::Type::getVoidTy(getLLVMContext());
     break;
+
+  case ABIArgInfo::CoerceAndExpand:
+    resultType = retAI.getUnpaddedCoerceAndExpandType();
+    break;
   }
 
   ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI, true);
@@ -1390,6 +1567,15 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) {
       break;
     }
 
+    case ABIArgInfo::CoerceAndExpand: {
+      auto ArgTypesIter = ArgTypes.begin() + FirstIRArg;
+      for (auto EltTy : ArgInfo.getCoerceAndExpandTypeSequence()) {
+        *ArgTypesIter++ = EltTy;
+      }
+      assert(ArgTypesIter == ArgTypes.begin() + FirstIRArg + NumIRArgs);
+      break;
+    }
+
     case ABIArgInfo::Expand:
       auto ArgTypesIter = ArgTypes.begin() + FirstIRArg;
       getExpandedTypes(it->type, ArgTypesIter);
@@ -1450,6 +1636,7 @@ void CodeGenModule::ConstructAttributeList(
 
   const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
 
+  bool HasAnyX86InterruptAttr = false;
   // FIXME: handle sseregparm someday...
   if (TargetDecl) {
     if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1487,6 +1674,7 @@ void CodeGenModule::ConstructAttributeList(
     if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
       RetAttrs.addAttribute(llvm::Attribute::NonNull);
 
+    HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>();
     HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
   }
 
@@ -1526,10 +1714,11 @@ void CodeGenModule::ConstructAttributeList(
     }
 
     bool DisableTailCalls =
-        CodeGenOpts.DisableTailCalls ||
+        CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr ||
         (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>());
-    FuncAttrs.addAttribute("disable-tail-calls",
-                           llvm::toStringRef(DisableTailCalls));
+    FuncAttrs.addAttribute(
+        "disable-tail-calls",
+        llvm::toStringRef(DisableTailCalls));
 
     FuncAttrs.addAttribute("less-precise-fpmad",
                            llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
@@ -1543,9 +1732,13 @@ void CodeGenModule::ConstructAttributeList(
                            llvm::toStringRef(CodeGenOpts.SoftFloat));
     FuncAttrs.addAttribute("stack-protector-buffer-size",
                            llvm::utostr(CodeGenOpts.SSPBufferSize));
+    FuncAttrs.addAttribute("no-signed-zeros-fp-math",
+                           llvm::toStringRef(CodeGenOpts.NoSignedZeros));
 
     if (CodeGenOpts.StackRealignment)
       FuncAttrs.addAttribute("stackrealign");
+    if (CodeGenOpts.Backchain)
+      FuncAttrs.addAttribute("backchain");
 
     // Add target-cpu and target-features attributes to functions. If
     // we have a decl for the function and it has a target attribute then
@@ -1594,6 +1787,18 @@ void CodeGenModule::ConstructAttributeList(
     }
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all functions and calls in CUDA as convergent
+    // (meaning, they may call an intrinsically convergent op, such as
+    // __syncthreads(), and so can't have certain optimizations applied around
+    // them).  LLVM will remove this attribute where it safely can.
+    FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+
+    // Respect -fcuda-flush-denormals-to-zero.
+    if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+      FuncAttrs.addAttribute("nvptx-f32ftz", "true");
+  }
+
   ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
 
   QualType RetTy = FI.getReturnType();
@@ -1620,6 +1825,9 @@ void CodeGenModule::ConstructAttributeList(
     break;
   }
 
+  case ABIArgInfo::CoerceAndExpand:
+    break;
+
   case ABIArgInfo::Expand:
     llvm_unreachable("Invalid ABI kind for return argument");
   }
@@ -1639,10 +1847,13 @@ void CodeGenModule::ConstructAttributeList(
         getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs));
   }
 
+  bool hasUsedSRet = false;
+
   // Attach attributes to sret.
   if (IRFunctionArgs.hasSRetArg()) {
     llvm::AttrBuilder SRETAttrs;
     SRETAttrs.addAttribute(llvm::Attribute::StructRet);
+    hasUsedSRet = true;
     if (RetAI.getInReg())
       SRETAttrs.addAttribute(llvm::Attribute::InReg);
     PAL.push_back(llvm::AttributeSet::get(
@@ -1727,7 +1938,8 @@ void CodeGenModule::ConstructAttributeList(
     }
     case ABIArgInfo::Ignore:
     case ABIArgInfo::Expand:
-      continue;
+    case ABIArgInfo::CoerceAndExpand:
+      break;
 
     case ABIArgInfo::InAlloca:
       // inalloca disables readnone and readonly.
@@ -1745,6 +1957,41 @@ void CodeGenModule::ConstructAttributeList(
         Attrs.addAttribute(llvm::Attribute::NonNull);
     }
 
+    switch (FI.getExtParameterInfo(ArgNo).getABI()) {
+    case ParameterABI::Ordinary:
+      break;
+
+    case ParameterABI::SwiftIndirectResult: {
+      // Add 'sret' if we haven't already used it for something, but
+      // only if the result is void.
+      if (!hasUsedSRet && RetTy->isVoidType()) {
+        Attrs.addAttribute(llvm::Attribute::StructRet);
+        hasUsedSRet = true;
+      }
+
+      // Add 'noalias' in either case.
+      Attrs.addAttribute(llvm::Attribute::NoAlias);
+
+      // Add 'dereferenceable' and 'alignment'.
+      auto PTy = ParamType->getPointeeType();
+      if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
+        auto info = getContext().getTypeInfoInChars(PTy);
+        Attrs.addDereferenceableAttr(info.first.getQuantity());
+        Attrs.addAttribute(llvm::Attribute::getWithAlignment(getLLVMContext(),
+                                                 info.second.getQuantity()));
+      }
+      break;
+    }
+
+    case ParameterABI::SwiftErrorResult:
+      Attrs.addAttribute(llvm::Attribute::SwiftError);
+      break;
+
+    case ParameterABI::SwiftContext:
+      Attrs.addAttribute(llvm::Attribute::SwiftSelf);
+      break;
+    }
+
     if (Attrs.hasAttributes()) {
       unsigned FirstIRArg, NumIRArgs;
       std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
@@ -1810,6 +2057,18 @@ static const NonNullAttr *getNonNullAttr(const Decl *FD, const ParmVarDecl *PVD,
   return nullptr;
 }
 
+namespace {
+  struct CopyBackSwiftError final : EHScopeStack::Cleanup {
+    Address Temp;
+    Address Arg;
+    CopyBackSwiftError(Address temp, Address arg) : Temp(temp), Arg(arg) {}
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
+      llvm::Value *errorValue = CGF.Builder.CreateLoad(Temp);
+      CGF.Builder.CreateStore(errorValue, Arg);
+    }
+  };
+}
+
 void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
                                          llvm::Function *Fn,
                                          const FunctionArgList &Args) {
@@ -1835,7 +2094,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
   ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI);
   // Flattened function arguments.
-  SmallVector<llvm::Argument *, 16> FnArgs;
+  SmallVector<llvm::Value *, 16> FnArgs;
   FnArgs.reserve(IRFunctionArgs.totalIRArgs());
   for (auto &Arg : Fn->args()) {
     FnArgs.push_back(&Arg);
@@ -1856,7 +2115,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
   // Name the struct return parameter.
   if (IRFunctionArgs.hasSRetArg()) {
-    auto AI = FnArgs[IRFunctionArgs.getSRetArgNo()];
+    auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]);
     AI->setName("agg.result");
     AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
                                         llvm::Attribute::NoAlias));
@@ -1944,8 +2203,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
           ArgI.getCoerceToType() == ConvertType(Ty) &&
           ArgI.getDirectOffset() == 0) {
         assert(NumIRArgs == 1);
-        auto AI = FnArgs[FirstIRArg];
-        llvm::Value *V = AI;
+        llvm::Value *V = FnArgs[FirstIRArg];
+        auto AI = cast<llvm::Argument>(V);
 
         if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
           if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
@@ -2014,6 +2273,25 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
                                               AI->getArgNo() + 1,
                                               llvm::Attribute::NoAlias));
 
+        // LLVM expects swifterror parameters to be used in very restricted
+        // ways.  Copy the value into a less-restricted temporary.
+        if (FI.getExtParameterInfo(ArgNo).getABI()
+              == ParameterABI::SwiftErrorResult) {
+          QualType pointeeTy = Ty->getPointeeType();
+          assert(pointeeTy->isPointerType());
+          Address temp =
+            CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
+          Address arg = Address(V, getContext().getTypeAlignInChars(pointeeTy));
+          llvm::Value *incomingErrorValue = Builder.CreateLoad(arg);
+          Builder.CreateStore(incomingErrorValue, temp);
+          V = temp.getPointer();
+
+          // Push a cleanup to copy the value back at the end of the function.
+          // The convention does not guarantee that the value will be written
+          // back if the function exits with an unwind exception.
+          EHStack.pushCleanup<CopyBackSwiftError>(NormalCleanup, temp, arg);
+        }
+
         // Ensure the argument is the correct type.
         if (V->getType() != ArgI.getCoerceToType())
           V = Builder.CreateBitCast(V, ArgI.getCoerceToType());
@@ -2100,6 +2378,29 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       break;
     }
 
+    case ABIArgInfo::CoerceAndExpand: {
+      // Reconstruct into a temporary.
+      Address alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg));
+      ArgVals.push_back(ParamValue::forIndirect(alloca));
+
+      auto coercionType = ArgI.getCoerceAndExpandType();
+      alloca = Builder.CreateElementBitCast(alloca, coercionType);
+      auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+
+      unsigned argIndex = FirstIRArg;
+      for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
+        llvm::Type *eltType = coercionType->getElementType(i);
+        if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType))
+          continue;
+
+        auto eltAddr = Builder.CreateStructGEP(alloca, i, layout);
+        auto elt = FnArgs[argIndex++];
+        Builder.CreateStore(elt, eltAddr);
+      }
+      assert(argIndex == FirstIRArg + NumIRArgs);
+      break;
+    }
+
     case ABIArgInfo::Expand: {
       // If this structure was expanded into multiple arguments then
       // we need to create a temporary and reconstruct it from the
@@ -2462,9 +2763,26 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
     // In ARC, end functions that return a retainable type with a call
     // to objc_autoreleaseReturnValue.
     if (AutoreleaseResult) {
+#ifndef NDEBUG
+      // Type::isObjCRetainabletype has to be called on a QualType that hasn't
+      // been stripped of the typedefs, so we cannot use RetTy here. Get the
+      // original return type of FunctionDecl, CurCodeDecl, and BlockDecl from
+      // CurCodeDecl or BlockInfo.
+      QualType RT;
+
+      if (auto *FD = dyn_cast<FunctionDecl>(CurCodeDecl))
+        RT = FD->getReturnType();
+      else if (auto *MD = dyn_cast<ObjCMethodDecl>(CurCodeDecl))
+        RT = MD->getReturnType();
+      else if (isa<BlockDecl>(CurCodeDecl))
+        RT = BlockInfo->BlockExpression->getFunctionType()->getReturnType();
+      else
+        llvm_unreachable("Unexpected function/method type");
+
       assert(getLangOpts().ObjCAutoRefCount &&
              !FI.isReturnsRetained() &&
-             RetTy->isObjCRetainableType());
+             RT->isObjCRetainableType());
+#endif
       RV = emitAutoreleaseOfResult(*this, RV);
     }
 
@@ -2473,6 +2791,40 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
   case ABIArgInfo::Ignore:
     break;
 
+  case ABIArgInfo::CoerceAndExpand: {
+    auto coercionType = RetAI.getCoerceAndExpandType();
+    auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+
+    // Load all of the coerced elements out into results.
+    llvm::SmallVector<llvm::Value*, 4> results;
+    Address addr = Builder.CreateElementBitCast(ReturnValue, coercionType);
+    for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
+      auto coercedEltType = coercionType->getElementType(i);
+      if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType))
+        continue;
+
+      auto eltAddr = Builder.CreateStructGEP(addr, i, layout);
+      auto elt = Builder.CreateLoad(eltAddr);
+      results.push_back(elt);
+    }
+
+    // If we have one result, it's the single direct result type.
+    if (results.size() == 1) {
+      RV = results[0];
+
+    // Otherwise, we need to make a first-class aggregate.
+    } else {
+      // Construct a return type that lacks padding elements.
+      llvm::Type *returnType = RetAI.getUnpaddedCoerceAndExpandType();
+
+      RV = llvm::UndefValue::get(returnType);
+      for (unsigned i = 0, e = results.size(); i != e; ++i) {
+        RV = Builder.CreateInsertValue(RV, results[i], i);
+      }
+    }
+    break;
+  }
+
   case ABIArgInfo::Expand:
     llvm_unreachable("Invalid ABI kind for return argument");
   }
@@ -2536,23 +2888,15 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
 
   QualType type = param->getType();
 
-  // For the most part, we just need to load the alloca, except:
-  // 1) aggregate r-values are actually pointers to temporaries, and
-  // 2) references to non-scalars are pointers directly to the aggregate.
-  // I don't know why references to scalars are different here.
-  if (const ReferenceType *ref = type->getAs<ReferenceType>()) {
-    if (!hasScalarEvaluationKind(ref->getPointeeType()))
-      return args.add(RValue::getAggregate(local), type);
-
-    // Locals which are references to scalars are represented
-    // with allocas holding the pointer.
-    return args.add(RValue::get(Builder.CreateLoad(local)), type);
-  }
-
   assert(!isInAllocaArgument(CGM.getCXXABI(), type) &&
          "cannot emit delegate call arguments for inalloca arguments!");
 
-  args.add(convertTempToRValue(local, type, loc), type);
+  // For the most part, we just need to load the alloca, except that
+  // aggregate r-values are actually pointers to temporaries.
+  if (type->isReferenceType())
+    args.add(RValue::get(Builder.CreateLoad(local)), type);
+  else
+    args.add(convertTempToRValue(local, type, loc), type);
 }
 
 static bool isProvablyNull(llvm::Value *addr) {
@@ -2863,10 +3207,10 @@ void CodeGenFunction::EmitCallArgs(
     size_t CallArgsStart = Args.size();
     for (int I = ArgTypes.size() - 1; I >= 0; --I) {
       CallExpr::const_arg_iterator Arg = ArgRange.begin() + I;
+      MaybeEmitImplicitObjectSize(I, *Arg);
       EmitCallArg(Args, *Arg, ArgTypes[I]);
       EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], (*Arg)->getExprLoc(),
                           CalleeDecl, ParamsToSkip + I);
-      MaybeEmitImplicitObjectSize(I, *Arg);
     }
 
     // Un-reverse the arguments we just evaluated so they match up with the LLVM
@@ -3046,24 +3390,13 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
   return EmitRuntimeCall(callee, None, name);
 }
 
-/// Emits a simple call (never an invoke) to the given runtime
-/// function.
-llvm::CallInst *
-CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
-                                 ArrayRef<llvm::Value*> args,
-                                 const llvm::Twine &name) {
-  llvm::CallInst *call = Builder.CreateCall(callee, args, name);
-  call->setCallingConv(getRuntimeCC());
-  return call;
-}
-
 // Calls which may throw must have operand bundles indicating which funclet
 // they are nested within.
 static void
-getBundlesForFunclet(llvm::Value *Callee,
-                     llvm::Instruction *CurrentFuncletPad,
+getBundlesForFunclet(llvm::Value *Callee, llvm::Instruction *CurrentFuncletPad,
                      SmallVectorImpl<llvm::OperandBundleDef> &BundleList) {
-  // There is no need for a funclet operand bundle if we aren't inside a funclet.
+  // There is no need for a funclet operand bundle if we aren't inside a
+  // funclet.
   if (!CurrentFuncletPad)
     return;
 
@@ -3075,6 +3408,19 @@ getBundlesForFunclet(llvm::Value *Callee,
   BundleList.emplace_back("funclet", CurrentFuncletPad);
 }
 
+/// Emits a simple call (never an invoke) to the given runtime function.
+llvm::CallInst *
+CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
+                                 ArrayRef<llvm::Value*> args,
+                                 const llvm::Twine &name) {
+  SmallVector<llvm::OperandBundleDef, 1> BundleList;
+  getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
+
+  llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList, name);
+  call->setCallingConv(getRuntimeCC());
+  return call;
+}
+
 /// Emits a call or invoke to the given noreturn runtime function.
 void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
                                                ArrayRef<llvm::Value*> args) {
@@ -3098,8 +3444,7 @@ void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
   }
 }
 
-/// Emits a call or invoke instruction to the given nullary runtime
-/// function.
+/// Emits a call or invoke instruction to the given nullary runtime function.
 llvm::CallSite
 CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee,
                                          const Twine &name) {
@@ -3123,13 +3468,16 @@ CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
                                   ArrayRef<llvm::Value *> Args,
                                   const Twine &Name) {
   llvm::BasicBlock *InvokeDest = getInvokeDest();
+  SmallVector<llvm::OperandBundleDef, 1> BundleList;
+  getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList);
 
   llvm::Instruction *Inst;
   if (!InvokeDest)
-    Inst = Builder.CreateCall(Callee, Args, Name);
+    Inst = Builder.CreateCall(Callee, Args, BundleList, Name);
   else {
     llvm::BasicBlock *ContBB = createBasicBlock("invoke.cont");
-    Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, Name);
+    Inst = Builder.CreateInvoke(Callee, ContBB, InvokeDest, Args, BundleList,
+                                Name);
     EmitBlock(ContBB);
   }
 
@@ -3208,7 +3556,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   // alloca to hold the result, unless one is given to us.
   Address SRetPtr = Address::invalid();
   size_t UnusedReturnSize = 0;
-  if (RetAI.isIndirect() || RetAI.isInAlloca()) {
+  if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
     if (!ReturnValue.isNull()) {
       SRetPtr = ReturnValue.getValue();
     } else {
@@ -3222,12 +3570,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
     if (IRFunctionArgs.hasSRetArg()) {
       IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer();
-    } else {
+    } else if (RetAI.isInAlloca()) {
       Address Addr = createInAllocaStructGEP(RetAI.getInAllocaFieldIndex());
       Builder.CreateStore(SRetPtr.getPointer(), Addr);
     }
   }
 
+  Address swiftErrorTemp = Address::invalid();
+  Address swiftErrorArg = Address::invalid();
+
   assert(CallInfo.arg_size() == CallArgs.size() &&
          "Mismatch between function signature & arguments.");
   unsigned ArgNo = 0;
@@ -3334,6 +3685,25 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         else
           V = Builder.CreateLoad(RV.getAggregateAddress());
 
+        // Implement swifterror by copying into a new swifterror argument.
+        // We'll write back in the normal path out of the call.
+        if (CallInfo.getExtParameterInfo(ArgNo).getABI()
+              == ParameterABI::SwiftErrorResult) {
+          assert(!swiftErrorTemp.isValid() && "multiple swifterror args");
+
+          QualType pointeeTy = I->Ty->getPointeeType();
+          swiftErrorArg =
+            Address(V, getContext().getTypeAlignInChars(pointeeTy));
+
+          swiftErrorTemp =
+            CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
+          V = swiftErrorTemp.getPointer();
+          cast<llvm::AllocaInst>(V)->setSwiftError(true);
+
+          llvm::Value *errorValue = Builder.CreateLoad(swiftErrorArg);
+          Builder.CreateStore(errorValue, swiftErrorTemp);
+        }
+
         // We might have to widen integers, but we should never truncate.
         if (ArgInfo.getCoerceToType() != V->getType() &&
             V->getType()->isIntegerTy())
@@ -3344,6 +3714,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         if (FirstIRArg < IRFuncTy->getNumParams() &&
             V->getType() != IRFuncTy->getParamType(FirstIRArg))
           V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));
+
         IRCallArgs[FirstIRArg] = V;
         break;
       }
@@ -3402,6 +3773,51 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       break;
     }
 
+    case ABIArgInfo::CoerceAndExpand: {
+      auto coercionType = ArgInfo.getCoerceAndExpandType();
+      auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+
+      llvm::Value *tempSize = nullptr;
+      Address addr = Address::invalid();
+      if (RV.isAggregate()) {
+        addr = RV.getAggregateAddress();
+      } else {
+        assert(RV.isScalar()); // complex should always just be direct
+
+        llvm::Type *scalarType = RV.getScalarVal()->getType();
+        auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType);
+        auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);
+
+        tempSize = llvm::ConstantInt::get(CGM.Int64Ty, scalarSize);
+
+        // Materialize to a temporary.
+        addr = CreateTempAlloca(RV.getScalarVal()->getType(),
+                 CharUnits::fromQuantity(std::max(layout->getAlignment(),
+                                                  scalarAlign)));
+        EmitLifetimeStart(scalarSize, addr.getPointer());
+
+        Builder.CreateStore(RV.getScalarVal(), addr);
+      }
+
+      addr = Builder.CreateElementBitCast(addr, coercionType);
+
+      unsigned IRArgPos = FirstIRArg;
+      for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
+        llvm::Type *eltType = coercionType->getElementType(i);
+        if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
+        Address eltAddr = Builder.CreateStructGEP(addr, i, layout);
+        llvm::Value *elt = Builder.CreateLoad(eltAddr);
+        IRCallArgs[IRArgPos++] = elt;
+      }
+      assert(IRArgPos == FirstIRArg + NumIRArgs);
+
+      if (tempSize) {
+        EmitLifetimeEnd(tempSize, addr.getPointer());
+      }
+
+      break;
+    }
+
     case ABIArgInfo::Expand:
       unsigned IRArgPos = FirstIRArg;
       ExpandTypeToArgs(I->Ty, RV, IRFuncTy, IRCallArgs, IRArgPos);
@@ -3541,6 +3957,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   CS.setAttributes(Attrs);
   CS.setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));
 
+  // Insert instrumentation or attach profile metadata at indirect call sites.
+  // For more details, see the comment before the definition of
+  // IPVK_IndirectCallTarget in InstrProfData.inc.
+  if (!CS.getCalledFunction())
+    PGO.valueProfile(Builder, llvm::IPVK_IndirectCallTarget,
+                     CS.getInstruction(), Callee);
+
   // In ObjC ARC mode with no ObjC ARC exception safety, tell the ARC
   // optimizer it can aggressively ignore unwind edges.
   if (CGM.getLangOpts().ObjCAutoRefCount)
@@ -3567,9 +3990,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   }
 
   llvm::Instruction *CI = CS.getInstruction();
-  if (Builder.isNamePreserving() && !CI->getType()->isVoidTy())
+  if (!CI->getType()->isVoidTy())
     CI->setName("call");
 
+  // Perform the swifterror writeback.
+  if (swiftErrorTemp.isValid()) {
+    llvm::Value *errorResult = Builder.CreateLoad(swiftErrorTemp);
+    Builder.CreateStore(errorResult, swiftErrorArg);
+  }
+
   // Emit any writebacks immediately.  Arguably this should happen
   // after any return-value munging.
   if (CallArgs.hasWritebacks())
@@ -3587,6 +4016,31 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   RValue Ret = [&] {
     switch (RetAI.getKind()) {
+    case ABIArgInfo::CoerceAndExpand: {
+      auto coercionType = RetAI.getCoerceAndExpandType();
+      auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+
+      Address addr = SRetPtr;
+      addr = Builder.CreateElementBitCast(addr, coercionType);
+
+      assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType());
+      bool requiresExtract = isa<llvm::StructType>(CI->getType());
+
+      unsigned unpaddedIndex = 0;
+      for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
+        llvm::Type *eltType = coercionType->getElementType(i);
+        if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
+        Address eltAddr = Builder.CreateStructGEP(addr, i, layout);
+        llvm::Value *elt = CI;
+        if (requiresExtract)
+          elt = Builder.CreateExtractValue(elt, unpaddedIndex++);
+        else
+          assert(unpaddedIndex == 0);
+        Builder.CreateStore(elt, eltAddr);
+      }
+      // FALLTHROUGH
+    }
+
     case ABIArgInfo::InAlloca:
     case ABIArgInfo::Indirect: {
       RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
index 2e566de..7ed891f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGClass.cpp
@@ -26,6 +26,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/Transforms/Utils/SanitizerStats.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -94,7 +95,7 @@ CodeGenModule::getDynamicOffsetAlignment(CharUnits actualBaseAlign,
   // unless we someday add some sort of attribute to change the
   // assumed alignment of 'this'.  So our goal here is pretty much
   // just to allow the user to explicitly say that a pointer is
-  // under-aligned and then safely access its fields and v-tables.
+  // under-aligned and then safely access its fields and vtables.
   if (actualBaseAlign >= expectedBaseAlign) {
     return expectedTargetAlign;
   }
@@ -745,7 +746,7 @@ static void EmitMemberInitializer(CodeGenFunction &CGF,
 
   ArrayRef<VarDecl *> ArrayIndexes;
   if (MemberInit->getNumArrayIndices())
-    ArrayIndexes = MemberInit->getArrayIndexes();
+    ArrayIndexes = MemberInit->getArrayIndices();
   CGF.EmitInitializerForField(Field, LHS, MemberInit->getInit(), ArrayIndexes);
 }
 
@@ -986,7 +987,7 @@ namespace {
     CodeGenFunction &CGF;
     SanitizerSet OldSanOpts;
   };
-}
+} // end anonymous namespace
  
 namespace {
   class FieldMemcpyizer {
@@ -1071,7 +1072,6 @@ namespace {
     const CXXRecordDecl *ClassDecl;
 
   private:
-
     void emitMemcpyIR(Address DestPtr, Address SrcPtr, CharUnits Size) {
       llvm::PointerType *DPT = DestPtr.getType();
       llvm::Type *DBP =
@@ -1087,13 +1087,12 @@ namespace {
     }
 
     void addInitialField(FieldDecl *F) {
-        FirstField = F;
-        LastField = F;
-        FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
-        LastFieldOffset = FirstFieldOffset;
-        LastAddedFieldIndex = F->getFieldIndex();
-        return;
-      }
+      FirstField = F;
+      LastField = F;
+      FirstFieldOffset = RecLayout.getFieldOffset(F->getFieldIndex());
+      LastFieldOffset = FirstFieldOffset;
+      LastAddedFieldIndex = F->getFieldIndex();
+    }
 
     void addNextField(FieldDecl *F) {
       // For the most part, the following invariant will hold:
@@ -1127,7 +1126,6 @@ namespace {
 
   class ConstructorMemcpyizer : public FieldMemcpyizer {
   private:
-
     /// Get source argument for copy constructor. Returns null if not a copy
     /// constructor.
     static const VarDecl *getTrivialCopySource(CodeGenFunction &CGF,
@@ -1232,7 +1230,6 @@ namespace {
 
   class AssignmentMemcpyizer : public FieldMemcpyizer {
   private:
-
     // Returns the memcpyable field copied by the given statement, if one
     // exists. Otherwise returns null.
     FieldDecl *getMemcpyableField(Stmt *S) {
@@ -1306,7 +1303,6 @@ namespace {
     SmallVector<Stmt*, 16> AggregatedStmts;
 
   public:
-
     AssignmentMemcpyizer(CodeGenFunction &CGF, const CXXMethodDecl *AD,
                          FunctionArgList &Args)
       : FieldMemcpyizer(CGF, AD->getParent(), Args[Args.size() - 1]),
@@ -1607,6 +1603,7 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args)
 
   LexicalScope Scope(*this, RootCS->getSourceRange());
 
+  incrementProfileCounter(RootCS);
   AssignmentMemcpyizer AM(*this, AssignOp, Args);
   for (auto *I : RootCS->body())
     AM.emitAssignment(I);
@@ -1628,6 +1625,7 @@ namespace {
 
   struct CallDtorDeleteConditional final : EHScopeStack::Cleanup {
     llvm::Value *ShouldDeleteCondition;
+
   public:
     CallDtorDeleteConditional(llvm::Value *ShouldDeleteCondition)
         : ShouldDeleteCondition(ShouldDeleteCondition) {
@@ -1917,7 +1915,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
 /// \param zeroInitialize true if each element should be
 ///   zero-initialized before it is constructed
 void CodeGenFunction::EmitCXXAggrConstructorCall(
-    const CXXConstructorDecl *ctor, const ConstantArrayType *arrayType,
+    const CXXConstructorDecl *ctor, const ArrayType *arrayType,
     Address arrayBegin, const CXXConstructExpr *E, bool zeroInitialize) {
   QualType elementType;
   llvm::Value *numElements =
@@ -2050,6 +2048,62 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
                                              bool ForVirtualBase,
                                              bool Delegating, Address This,
                                              const CXXConstructExpr *E) {
+  CallArgList Args;
+
+  // Push the this ptr.
+  Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
+
+  // If this is a trivial constructor, emit a memcpy now before we lose
+  // the alignment information on the argument.
+  // FIXME: It would be better to preserve alignment information into CallArg.
+  if (isMemcpyEquivalentSpecialMember(D)) {
+    assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
+
+    const Expr *Arg = E->getArg(0);
+    QualType SrcTy = Arg->getType();
+    Address Src = EmitLValue(Arg).getAddress();
+    QualType DestTy = getContext().getTypeDeclType(D->getParent());
+    EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
+    return;
+  }
+
+  // Add the rest of the user-supplied arguments.
+  const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
+  EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor());
+
+  EmitCXXConstructorCall(D, Type, ForVirtualBase, Delegating, This, Args);
+}
+
+static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
+                                    const CXXConstructorDecl *Ctor,
+                                    CXXCtorType Type, CallArgList &Args) {
+  // We can't forward a variadic call.
+  if (Ctor->isVariadic())
+    return false;
+
+  if (CGF.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
+    // If the parameters are callee-cleanup, it's not safe to forward.
+    for (auto *P : Ctor->parameters())
+      if (P->getType().isDestructedType())
+        return false;
+
+    // Likewise if they're inalloca.
+    const CGFunctionInfo &Info =
+        CGF.CGM.getTypes().arrangeCXXConstructorCall(Args, Ctor, Type, 0);
+    if (Info.usesInAlloca())
+      return false;
+  }
+
+  // Anything else should be OK.
+  return true;
+}
+
+void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
+                                             CXXCtorType Type,
+                                             bool ForVirtualBase,
+                                             bool Delegating,
+                                             Address This,
+                                             CallArgList &Args) {
   const CXXRecordDecl *ClassDecl = D->getParent();
 
   // C++11 [class.mfct.non-static]p2:
@@ -2060,7 +2114,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
                 This.getPointer(), getContext().getRecordType(ClassDecl));
 
   if (D->isTrivial() && D->isDefaultConstructor()) {
-    assert(E->getNumArgs() == 0 && "trivial default ctor with args");
+    assert(Args.size() == 1 && "trivial default ctor with args");
     return;
   }
 
@@ -2068,24 +2122,24 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
   // union copy constructor, we must emit a memcpy, because the AST does not
   // model that copy.
   if (isMemcpyEquivalentSpecialMember(D)) {
-    assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
+    assert(Args.size() == 2 && "unexpected argcount for trivial ctor");
 
-    const Expr *Arg = E->getArg(0);
-    QualType SrcTy = Arg->getType();
-    Address Src = EmitLValue(Arg).getAddress();
+    QualType SrcTy = D->getParamDecl(0)->getType().getNonReferenceType();
+    Address Src(Args[1].RV.getScalarVal(), getNaturalTypeAlignment(SrcTy));
     QualType DestTy = getContext().getTypeDeclType(ClassDecl);
     EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
     return;
   }
 
-  CallArgList Args;
-
-  // Push the this ptr.
-  Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
-
-  // Add the rest of the user-supplied arguments.
-  const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
-  EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor());
+  // Check whether we can actually emit the constructor before trying to do so.
+  if (auto Inherited = D->getInheritedConstructor()) {
+    if (getTypes().inheritingCtorHasParams(Inherited, Type) &&
+        !canEmitDelegateCallArgs(*this, D, Type, Args)) {
+      EmitInlinedInheritingCXXConstructorCall(D, Type, ForVirtualBase,
+                                              Delegating, Args);
+      return;
+    }
+  }
 
   // Insert any ABI-specific implicit constructor arguments.
   unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs(
@@ -2115,6 +2169,95 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
     EmitVTableAssumptionLoads(ClassDecl, This);
 }
 
+void CodeGenFunction::EmitInheritedCXXConstructorCall(
+    const CXXConstructorDecl *D, bool ForVirtualBase, Address This,
+    bool InheritedFromVBase, const CXXInheritedCtorInitExpr *E) {
+  CallArgList Args;
+  CallArg ThisArg(RValue::get(This.getPointer()), D->getThisType(getContext()),
+                  /*NeedsCopy=*/false);
+
+  // Forward the parameters.
+  if (InheritedFromVBase &&
+      CGM.getTarget().getCXXABI().hasConstructorVariants()) {
+    // Nothing to do; this construction is not responsible for constructing
+    // the base class containing the inherited constructor.
+    // FIXME: Can we just pass undef's for the remaining arguments if we don't
+    // have constructor variants?
+    Args.push_back(ThisArg);
+  } else if (!CXXInheritedCtorInitExprArgs.empty()) {
+    // The inheriting constructor was inlined; just inject its arguments.
+    assert(CXXInheritedCtorInitExprArgs.size() >= D->getNumParams() &&
+           "wrong number of parameters for inherited constructor call");
+    Args = CXXInheritedCtorInitExprArgs;
+    Args[0] = ThisArg;
+  } else {
+    // The inheriting constructor was not inlined. Emit delegating arguments.
+    Args.push_back(ThisArg);
+    const auto *OuterCtor = cast<CXXConstructorDecl>(CurCodeDecl);
+    assert(OuterCtor->getNumParams() == D->getNumParams());
+    assert(!OuterCtor->isVariadic() && "should have been inlined");
+
+    for (const auto *Param : OuterCtor->parameters()) {
+      assert(getContext().hasSameUnqualifiedType(
+          OuterCtor->getParamDecl(Param->getFunctionScopeIndex())->getType(),
+          Param->getType()));
+      EmitDelegateCallArg(Args, Param, E->getLocation());
+
+      // Forward __attribute__(pass_object_size).
+      if (Param->hasAttr<PassObjectSizeAttr>()) {
+        auto *POSParam = SizeArguments[Param];
+        assert(POSParam && "missing pass_object_size value for forwarding");
+        EmitDelegateCallArg(Args, POSParam, E->getLocation());
+      }
+    }
+  }
+
+  EmitCXXConstructorCall(D, Ctor_Base, ForVirtualBase, /*Delegating*/false,
+                         This, Args);
+}
+
+void CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
+    const CXXConstructorDecl *Ctor, CXXCtorType CtorType, bool ForVirtualBase,
+    bool Delegating, CallArgList &Args) {
+  InlinedInheritingConstructorScope Scope(*this, GlobalDecl(Ctor, CtorType));
+
+  // Save the arguments to be passed to the inherited constructor.
+  CXXInheritedCtorInitExprArgs = Args;
+
+  FunctionArgList Params;
+  QualType RetType = BuildFunctionArgList(CurGD, Params);
+  FnRetTy = RetType;
+
+  // Insert any ABI-specific implicit constructor arguments.
+  CGM.getCXXABI().addImplicitConstructorArgs(*this, Ctor, CtorType,
+                                             ForVirtualBase, Delegating, Args);
+
+  // Emit a simplified prolog. We only need to emit the implicit params.
+  assert(Args.size() >= Params.size() && "too few arguments for call");
+  for (unsigned I = 0, N = Args.size(); I != N; ++I) {
+    if (I < Params.size() && isa<ImplicitParamDecl>(Params[I])) {
+      const RValue &RV = Args[I].RV;
+      assert(!RV.isComplex() && "complex indirect params not supported");
+      ParamValue Val = RV.isScalar()
+                           ? ParamValue::forDirect(RV.getScalarVal())
+                           : ParamValue::forIndirect(RV.getAggregateAddress());
+      EmitParmDecl(*Params[I], Val, I + 1);
+    }
+  }
+
+  // Create a return value slot if the ABI implementation wants one.
+  // FIXME: This is dumb, we should ask the ABI not to try to set the return
+  // value instead.
+  if (!RetType->isVoidType())
+    ReturnValue = CreateIRTemp(RetType, "retval.inhctor");
+
+  CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
+  CXXThisValue = CXXABIThisValue;
+
+  // Directly emit the constructor initializers.
+  EmitCtorPrologue(Ctor, CtorType, Params);
+}
+
 void CodeGenFunction::EmitVTableAssumptionLoad(const VPtr &Vptr, Address This) {
   llvm::Value *VTableGlobal =
       CGM.getCXXABI().getVTableAddressPoint(Vptr.Base, Vptr.VTableClass);
@@ -2147,19 +2290,6 @@ void
 CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
                                                 Address This, Address Src,
                                                 const CXXConstructExpr *E) {
-  if (isMemcpyEquivalentSpecialMember(D)) {
-    assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
-    assert(D->isCopyOrMoveConstructor() &&
-           "trivial 1-arg ctor not a copy/move ctor");
-    EmitAggregateCopyCtor(This, Src,
-                          getContext().getTypeDeclType(D->getParent()),
-                          (*E->arg_begin())->getType());
-    return;
-  }
-  llvm::Value *Callee = CGM.getAddrOfCXXStructor(D, StructorType::Complete);
-  assert(D->isInstance() &&
-         "Trying to emit a member call expr on a static method!");
-
   const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
 
   CallArgList Args;
@@ -2177,8 +2307,7 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
   EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(),
                /*ParamsToSkip*/ 1);
 
-  EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, RequiredArgs::All),
-           Callee, ReturnValueSlot(), Args, D);
+  EmitCXXConstructorCall(D, Ctor_Complete, false, false, This, Args);
 }
 
 void
@@ -2192,21 +2321,17 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
   assert(I != E && "no parameters to constructor");
 
   // this
-  DelegateArgs.add(RValue::get(LoadCXXThis()), (*I)->getType());
+  Address This = LoadCXXThisAddress();
+  DelegateArgs.add(RValue::get(This.getPointer()), (*I)->getType());
   ++I;
 
-  // vtt
-  if (llvm::Value *VTT = GetVTTParameter(GlobalDecl(Ctor, CtorType),
-                                         /*ForVirtualBase=*/false,
-                                         /*Delegating=*/true)) {
-    QualType VoidPP = getContext().getPointerType(getContext().VoidPtrTy);
-    DelegateArgs.add(RValue::get(VTT), VoidPP);
-
-    if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) {
-      assert(I != E && "cannot skip vtt parameter, already done with args");
-      assert((*I)->getType() == VoidPP && "skipping parameter not of vtt type");
-      ++I;
-    }
+  // FIXME: The location of the VTT parameter in the parameter list is
+  // specific to the Itanium ABI and shouldn't be hardcoded here.
+  if (CGM.getCXXABI().NeedsVTTParameter(CurGD)) {
+    assert(I != E && "cannot skip vtt parameter, already done with args");
+    assert((*I)->getType()->isPointerType() &&
+           "skipping parameter not of vtt type");
+    ++I;
   }
 
   // Explicit arguments.
@@ -2216,11 +2341,8 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
     EmitDelegateCallArg(DelegateArgs, param, Loc);
   }
 
-  llvm::Value *Callee =
-      CGM.getAddrOfCXXStructor(Ctor, getFromCtorType(CtorType));
-  EmitCall(CGM.getTypes()
-               .arrangeCXXStructorDeclaration(Ctor, getFromCtorType(CtorType)),
-           Callee, ReturnValueSlot(), DelegateArgs, Ctor);
+  EmitCXXConstructorCall(Ctor, CtorType, /*ForVirtualBase=*/false,
+                         /*Delegating=*/true, This, DelegateArgs);
 }
 
 namespace {
@@ -2289,7 +2411,7 @@ namespace {
                                 /*Delegating=*/false, Addr);
     }
   };
-}
+} // end anonymous namespace
 
 void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D,
                                             Address Addr) {
@@ -2487,15 +2609,35 @@ LeastDerivedClassWithSameLayout(const CXXRecordDecl *RD) {
       RD->bases_begin()->getType()->getAsCXXRecordDecl());
 }
 
-void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXMethodDecl *MD,
+void CodeGenFunction::EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
+                                                   llvm::Value *VTable,
+                                                   SourceLocation Loc) {
+  if (CGM.getCodeGenOpts().WholeProgramVTables &&
+      CGM.HasHiddenLTOVisibility(RD)) {
+    llvm::Metadata *MD =
+        CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
+    llvm::Value *TypeId =
+        llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD);
+
+    llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
+    llvm::Value *TypeTest =
+        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test),
+                           {CastedVTable, TypeId});
+    Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::assume), TypeTest);
+  }
+
+  if (SanOpts.has(SanitizerKind::CFIVCall))
+    EmitVTablePtrCheckForCall(RD, VTable, CodeGenFunction::CFITCK_VCall, Loc);
+}
+
+void CodeGenFunction::EmitVTablePtrCheckForCall(const CXXRecordDecl *RD,
                                                 llvm::Value *VTable,
                                                 CFITypeCheckKind TCK,
                                                 SourceLocation Loc) {
-  const CXXRecordDecl *ClassDecl = MD->getParent();
   if (!SanOpts.has(SanitizerKind::CFICastStrict))
-    ClassDecl = LeastDerivedClassWithSameLayout(ClassDecl);
+    RD = LeastDerivedClassWithSameLayout(RD);
 
-  EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc);
+  EmitVTablePtrCheck(RD, VTable, TCK, Loc);
 }
 
 void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
@@ -2547,26 +2689,41 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
                                          llvm::Value *VTable,
                                          CFITypeCheckKind TCK,
                                          SourceLocation Loc) {
-  if (CGM.IsCFIBlacklistedRecord(RD))
+  if (!CGM.getCodeGenOpts().SanitizeCfiCrossDso &&
+      !CGM.HasHiddenLTOVisibility(RD))
+    return;
+
+  std::string TypeName = RD->getQualifiedNameAsString();
+  if (getContext().getSanitizerBlacklist().isBlacklistedType(TypeName))
     return;
 
   SanitizerScope SanScope(this);
+  llvm::SanitizerStatKind SSK;
+  switch (TCK) {
+  case CFITCK_VCall:
+    SSK = llvm::SanStat_CFI_VCall;
+    break;
+  case CFITCK_NVCall:
+    SSK = llvm::SanStat_CFI_NVCall;
+    break;
+  case CFITCK_DerivedCast:
+    SSK = llvm::SanStat_CFI_DerivedCast;
+    break;
+  case CFITCK_UnrelatedCast:
+    SSK = llvm::SanStat_CFI_UnrelatedCast;
+    break;
+  case CFITCK_ICall:
+    llvm_unreachable("not expecting CFITCK_ICall");
+  }
+  EmitSanitizerStatReport(SSK);
 
   llvm::Metadata *MD =
       CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
-  llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD);
+  llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD);
 
   llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
-  llvm::Value *BitSetTest =
-      Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
-                         {CastedVTable, BitSetName});
-
-  if (CGM.getCodeGenOpts().SanitizeCfiCrossDso) {
-    if (auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD)) {
-      EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedVTable);
-      return;
-    }
-  }
+  llvm::Value *TypeTest = Builder.CreateCall(
+      CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, TypeId});
 
   SanitizerMask M;
   switch (TCK) {
@@ -2582,15 +2739,70 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
   case CFITCK_UnrelatedCast:
     M = SanitizerKind::CFIUnrelatedCast;
     break;
+  case CFITCK_ICall:
+    llvm_unreachable("not expecting CFITCK_ICall");
   }
 
   llvm::Constant *StaticData[] = {
+      llvm::ConstantInt::get(Int8Ty, TCK),
       EmitCheckSourceLocation(Loc),
       EmitCheckTypeDescriptor(QualType(RD->getTypeForDecl(), 0)),
-      llvm::ConstantInt::get(Int8Ty, TCK),
   };
-  EmitCheck(std::make_pair(BitSetTest, M), "cfi_bad_type", StaticData,
-            CastedVTable);
+
+  auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD);
+  if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) {
+    EmitCfiSlowPathCheck(M, TypeTest, CrossDsoTypeId, CastedVTable, StaticData);
+    return;
+  }
+
+  if (CGM.getCodeGenOpts().SanitizeTrap.has(M)) {
+    EmitTrapCheck(TypeTest);
+    return;
+  }
+
+  llvm::Value *AllVtables = llvm::MetadataAsValue::get(
+      CGM.getLLVMContext(),
+      llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
+  llvm::Value *ValidVtable = Builder.CreateCall(
+      CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedVTable, AllVtables});
+  EmitCheck(std::make_pair(TypeTest, M), "cfi_check_fail", StaticData,
+            {CastedVTable, ValidVtable});
+}
+
+bool CodeGenFunction::ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD) {
+  if (!CGM.getCodeGenOpts().WholeProgramVTables ||
+      !SanOpts.has(SanitizerKind::CFIVCall) ||
+      !CGM.getCodeGenOpts().SanitizeTrap.has(SanitizerKind::CFIVCall) ||
+      !CGM.HasHiddenLTOVisibility(RD))
+    return false;
+
+  std::string TypeName = RD->getQualifiedNameAsString();
+  return !getContext().getSanitizerBlacklist().isBlacklistedType(TypeName);
+}
+
+llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
+    const CXXRecordDecl *RD, llvm::Value *VTable, uint64_t VTableByteOffset) {
+  SanitizerScope SanScope(this);
+
+  EmitSanitizerStatReport(llvm::SanStat_CFI_VCall);
+
+  llvm::Metadata *MD =
+      CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
+  llvm::Value *TypeId = llvm::MetadataAsValue::get(CGM.getLLVMContext(), MD);
+
+  llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
+  llvm::Value *CheckedLoad = Builder.CreateCall(
+      CGM.getIntrinsic(llvm::Intrinsic::type_checked_load),
+      {CastedVTable, llvm::ConstantInt::get(Int32Ty, VTableByteOffset),
+       TypeId});
+  llvm::Value *CheckResult = Builder.CreateExtractValue(CheckedLoad, 1);
+
+  EmitCheck(std::make_pair(CheckResult, SanitizerKind::CFIVCall),
+            "cfi_check_fail", nullptr, nullptr);
+
+  return Builder.CreateBitCast(
+      Builder.CreateExtractValue(CheckedLoad, 0),
+      cast<llvm::PointerType>(VTable->getType())->getElementType());
 }
 
 // FIXME: Ideally Expr::IgnoreParenNoopCasts should do this, but it doesn't do
@@ -2731,7 +2943,7 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
   CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
 
   // Add the rest of the parameters.
-  for (auto param : BD->params())
+  for (auto param : BD->parameters())
     EmitDelegateCallArg(CallArgs, param, param->getLocStart());
 
   assert(!Lambda->isGenericLambda() &&
@@ -2761,7 +2973,7 @@ void CodeGenFunction::EmitLambdaDelegatingInvokeBody(const CXXMethodDecl *MD) {
   CallArgs.add(RValue::get(ThisPtr), ThisType);
 
   // Add the rest of the parameters.
-  for (auto Param : MD->params())
+  for (auto Param : MD->parameters())
     EmitDelegateCallArg(CallArgs, Param, Param->getLocStart());
 
   const CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
index ba7dcf7..b3278b3 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.cpp
@@ -112,7 +112,7 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) {
 
 /// Push an entry of the given size onto this protected-scope stack.
 char *EHScopeStack::allocate(size_t Size) {
-  Size = llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
+  Size = llvm::alignTo(Size, ScopeStackAlignment);
   if (!StartOfBuffer) {
     unsigned Capacity = 1024;
     while (Capacity < Size) Capacity *= 2;
@@ -143,7 +143,7 @@ char *EHScopeStack::allocate(size_t Size) {
 }
 
 void EHScopeStack::deallocate(size_t Size) {
-  StartOfData += llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
+  StartOfData += llvm::alignTo(Size, ScopeStackAlignment);
 }
 
 bool EHScopeStack::containsOnlyLifetimeMarkers(
@@ -157,6 +157,20 @@ bool EHScopeStack::containsOnlyLifetimeMarkers(
   return true;
 }
 
+bool EHScopeStack::requiresLandingPad() const {
+  for (stable_iterator si = getInnermostEHScope(); si != stable_end(); ) {
+    // Skip lifetime markers.
+    if (auto *cleanup = dyn_cast<EHCleanupScope>(&*find(si)))
+      if (cleanup->isLifetimeMarker()) {
+        si = cleanup->getEnclosingEHScope();
+        continue;
+      }
+    return true;
+  }
+
+  return false;
+}
+
 EHScopeStack::stable_iterator
 EHScopeStack::getInnermostActiveNormalCleanup() const {
   for (stable_iterator si = getInnermostNormalCleanup(), se = stable_end();
@@ -174,6 +188,7 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) {
   bool IsNormalCleanup = Kind & NormalCleanup;
   bool IsEHCleanup = Kind & EHCleanup;
   bool IsActive = !(Kind & InactiveCleanup);
+  bool IsLifetimeMarker = Kind & LifetimeMarker;
   EHCleanupScope *Scope =
     new (Buffer) EHCleanupScope(IsNormalCleanup,
                                 IsEHCleanup,
@@ -186,6 +201,8 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) {
     InnermostNormalCleanup = stable_begin();
   if (IsEHCleanup)
     InnermostEHScope = stable_begin();
+  if (IsLifetimeMarker)
+    Scope->setLifetimeMarker();
 
   return Scope->getCleanupBuffer();
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h
index 909f00b..98d01b1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h
@@ -86,11 +86,6 @@ protected:
     /// The amount of extra storage needed by the Cleanup.
     /// Always a multiple of the scope-stack alignment.
     unsigned CleanupSize : 12;
-
-    /// The number of fixups required by enclosing scopes (not including
-    /// this one).  If this is the top cleanup scope, all the fixups
-    /// from this index onwards belong to this scope.
-    unsigned FixupDepth : 32 - 18 - NumCommonBits; // currently 12
   };
 
   class FilterBitFields {
@@ -188,6 +183,7 @@ public:
                EHScopeStack::stable_iterator enclosingEHScope)
     : EHScope(Catch, enclosingEHScope) {
     CatchBits.NumHandlers = numHandlers;
+    assert(CatchBits.NumHandlers == numHandlers && "NumHandlers overflow?");
   }
 
   unsigned getNumHandlers() const {
@@ -263,6 +259,11 @@ class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EHCleanupScope : public EHScope {
   };
   mutable struct ExtInfo *ExtInfo;
 
+  /// The number of fixups required by enclosing scopes (not including
+  /// this one).  If this is the top cleanup scope, all the fixups
+  /// from this index onwards belong to this scope.
+  unsigned FixupDepth;
+
   struct ExtInfo &getExtInfo() {
     if (!ExtInfo) ExtInfo = new struct ExtInfo();
     return *ExtInfo;
@@ -288,8 +289,9 @@ public:
                  unsigned cleanupSize, unsigned fixupDepth,
                  EHScopeStack::stable_iterator enclosingNormal,
                  EHScopeStack::stable_iterator enclosingEH)
-    : EHScope(EHScope::Cleanup, enclosingEH), EnclosingNormal(enclosingNormal),
-      NormalBlock(nullptr), ActiveFlag(nullptr), ExtInfo(nullptr) {
+      : EHScope(EHScope::Cleanup, enclosingEH),
+        EnclosingNormal(enclosingNormal), NormalBlock(nullptr),
+        ActiveFlag(nullptr), ExtInfo(nullptr), FixupDepth(fixupDepth) {
     CleanupBits.IsNormalCleanup = isNormal;
     CleanupBits.IsEHCleanup = isEH;
     CleanupBits.IsActive = isActive;
@@ -297,7 +299,6 @@ public:
     CleanupBits.TestFlagInNormalCleanup = false;
     CleanupBits.TestFlagInEHCleanup = false;
     CleanupBits.CleanupSize = cleanupSize;
-    CleanupBits.FixupDepth = fixupDepth;
 
     assert(CleanupBits.CleanupSize == cleanupSize && "cleanup size overflow");
   }
@@ -343,7 +344,7 @@ public:
     return CleanupBits.TestFlagInEHCleanup;
   }
 
-  unsigned getFixupDepth() const { return CleanupBits.FixupDepth; }
+  unsigned getFixupDepth() const { return FixupDepth; }
   EHScopeStack::stable_iterator getEnclosingNormalCleanup() const {
     return EnclosingNormal;
   }
@@ -451,6 +452,7 @@ public:
   EHFilterScope(unsigned numFilters)
     : EHScope(Filter, EHScopeStack::stable_end()) {
     FilterBits.NumFilters = numFilters;
+    assert(FilterBits.NumFilters == numFilters && "NumFilters overflow");
   }
 
   static size_t getSizeForNumFilters(unsigned numFilters) {
@@ -540,7 +542,7 @@ public:
       Size = EHPadEndScope::getSize();
       break;
     }
-    Ptr += llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
+    Ptr += llvm::alignTo(Size, ScopeStackAlignment);
     return *this;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
index 5df8519..0607a51 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "CGDebugInfo.h"
 #include "CGBlocks.h"
+#include "CGRecordLayout.h"
 #include "CGCXXABI.h"
 #include "CGObjCRuntime.h"
 #include "CodeGenFunction.h"
@@ -168,10 +169,10 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context,
   }
 
   // Check namespace.
-  if (const NamespaceDecl *NSDecl = dyn_cast<NamespaceDecl>(Context))
+  if (const auto *NSDecl = dyn_cast<NamespaceDecl>(Context))
     return getOrCreateNameSpace(NSDecl);
 
-  if (const RecordDecl *RDecl = dyn_cast<RecordDecl>(Context))
+  if (const auto *RDecl = dyn_cast<RecordDecl>(Context))
     if (!RDecl->isDependentType())
       return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl),
                              getOrCreateMainFile());
@@ -184,30 +185,32 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) {
   FunctionTemplateSpecializationInfo *Info =
       FD->getTemplateSpecializationInfo();
 
-  if (!Info && FII && !CGM.getCodeGenOpts().EmitCodeView)
+  // Emit the unqualified name in normal operation. LLVM and the debugger can
+  // compute the fully qualified name from the scope chain. If we're only
+  // emitting line table info, there won't be any scope chains, so emit the
+  // fully qualified name here so that stack traces are more accurate.
+  // FIXME: Do this when emitting DWARF as well as when emitting CodeView after
+  // evaluating the size impact.
+  bool UseQualifiedName = DebugKind == codegenoptions::DebugLineTablesOnly &&
+                          CGM.getCodeGenOpts().EmitCodeView;
+
+  if (!Info && FII && !UseQualifiedName)
     return FII->getName();
 
-  // Otherwise construct human readable name for debug info.
   SmallString<128> NS;
   llvm::raw_svector_ostream OS(NS);
   PrintingPolicy Policy(CGM.getLangOpts());
-
-  if (CGM.getCodeGenOpts().EmitCodeView) {
-    // Print a fully qualified name like MSVC would.
-    Policy.MSVCFormatting = true;
-    FD->printQualifiedName(OS, Policy);
-  } else {
-    // Print the unqualified name with some template arguments. This is what
-    // DWARF-based debuggers expect.
+  Policy.MSVCFormatting = CGM.getCodeGenOpts().EmitCodeView;
+  if (!UseQualifiedName)
     FD->printName(OS);
-    // Add any template specialization args.
-    if (Info) {
-      const TemplateArgumentList *TArgs = Info->TemplateArguments;
-      const TemplateArgument *Args = TArgs->data();
-      unsigned NumArgs = TArgs->size();
-      TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs,
-                                                            Policy);
-    }
+  else
+    FD->printQualifiedName(OS, Policy);
+
+  // Add any template specialization args.
+  if (Info) {
+    const TemplateArgumentList *TArgs = Info->TemplateArguments;
+    TemplateSpecializationType::PrintTemplateArgumentList(OS, TArgs->asArray(),
+                                                          Policy);
   }
 
   // Copy this name on the side and use its reference.
@@ -219,21 +222,18 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
   llvm::raw_svector_ostream OS(MethodName);
   OS << (OMD->isInstanceMethod() ? '-' : '+') << '[';
   const DeclContext *DC = OMD->getDeclContext();
-  if (const ObjCImplementationDecl *OID =
-          dyn_cast<const ObjCImplementationDecl>(DC)) {
+  if (const auto *OID = dyn_cast<ObjCImplementationDecl>(DC)) {
     OS << OID->getName();
-  } else if (const ObjCInterfaceDecl *OID =
-                 dyn_cast<const ObjCInterfaceDecl>(DC)) {
+  } else if (const auto *OID = dyn_cast<ObjCInterfaceDecl>(DC)) {
     OS << OID->getName();
-  } else if (const ObjCCategoryDecl *OC = dyn_cast<ObjCCategoryDecl>(DC)) {
+  } else if (const auto *OC = dyn_cast<ObjCCategoryDecl>(DC)) {
     if (OC->IsClassExtension()) {
       OS << OC->getClassInterface()->getName();
     } else {
-      OS << ((const NamedDecl *)OC)->getIdentifier()->getNameStart() << '('
+      OS << OC->getIdentifier()->getNameStart() << '('
          << OC->getIdentifier()->getNameStart() << ')';
     }
-  } else if (const ObjCCategoryImplDecl *OCD =
-                 dyn_cast<const ObjCCategoryImplDecl>(DC)) {
+  } else if (const auto *OCD = dyn_cast<ObjCCategoryImplDecl>(DC)) {
     OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '('
        << OCD->getIdentifier()->getNameStart() << ')';
   } else if (isa<ObjCProtocolDecl>(DC)) {
@@ -254,20 +254,56 @@ StringRef CGDebugInfo::getSelectorName(Selector S) {
 }
 
 StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
-  // quick optimization to avoid having to intern strings that are already
-  // stored reliably elsewhere
-  if (!isa<ClassTemplateSpecializationDecl>(RD))
-    return RD->getName();
-
-  SmallString<128> Name;
-  {
+  if (isa<ClassTemplateSpecializationDecl>(RD)) {
+    SmallString<128> Name;
     llvm::raw_svector_ostream OS(Name);
     RD->getNameForDiagnostic(OS, CGM.getContext().getPrintingPolicy(),
                              /*Qualified*/ false);
+
+    // Copy this name on the side and use its reference.
+    return internString(Name);
   }
 
-  // Copy this name on the side and use its reference.
-  return internString(Name);
+  // quick optimization to avoid having to intern strings that are already
+  // stored reliably elsewhere
+  if (const IdentifierInfo *II = RD->getIdentifier())
+    return II->getName();
+
+  // The CodeView printer in LLVM wants to see the names of unnamed types: it is
+  // used to reconstruct the fully qualified type names.
+  if (CGM.getCodeGenOpts().EmitCodeView) {
+    if (const TypedefNameDecl *D = RD->getTypedefNameForAnonDecl()) {
+      assert(RD->getDeclContext() == D->getDeclContext() &&
+             "Typedef should not be in another decl context!");
+      assert(D->getDeclName().getAsIdentifierInfo() &&
+             "Typedef was not named!");
+      return D->getDeclName().getAsIdentifierInfo()->getName();
+    }
+
+    if (CGM.getLangOpts().CPlusPlus) {
+      StringRef Name;
+
+      ASTContext &Context = CGM.getContext();
+      if (const DeclaratorDecl *DD = Context.getDeclaratorForUnnamedTagDecl(RD))
+        // Anonymous types without a name for linkage purposes have their
+        // declarator mangled in if they have one.
+        Name = DD->getName();
+      else if (const TypedefNameDecl *TND =
+                   Context.getTypedefNameForUnnamedTagDecl(RD))
+        // Anonymous types without a name for linkage purposes have their
+        // associate typedef mangled in if they have one.
+        Name = TND->getName();
+
+      if (!Name.empty()) {
+        SmallString<256> UnnamedType("<unnamed-type-");
+        UnnamedType += Name;
+        UnnamedType += '>';
+        return internString(UnnamedType);
+      }
+    }
+  }
+
+  return StringRef();
 }
 
 llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
@@ -383,6 +419,8 @@ void CGDebugInfo::CreateCompileUnit() {
       LangTag = llvm::dwarf::DW_LANG_C_plus_plus;
   } else if (LO.ObjC1) {
     LangTag = llvm::dwarf::DW_LANG_ObjC;
+  } else if (LO.RenderScript) {
+    LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
   } else if (LO.C99) {
     LangTag = llvm::dwarf::DW_LANG_C99;
   } else {
@@ -396,16 +434,27 @@ void CGDebugInfo::CreateCompileUnit() {
   if (LO.ObjC1)
     RuntimeVers = LO.ObjCRuntime.isNonFragile() ? 2 : 1;
 
+  llvm::DICompileUnit::DebugEmissionKind EmissionKind;
+  switch (DebugKind) {
+  case codegenoptions::NoDebugInfo:
+  case codegenoptions::LocTrackingOnly:
+    EmissionKind = llvm::DICompileUnit::NoDebug;
+    break;
+  case codegenoptions::DebugLineTablesOnly:
+    EmissionKind = llvm::DICompileUnit::LineTablesOnly;
+    break;
+  case codegenoptions::LimitedDebugInfo:
+  case codegenoptions::FullDebugInfo:
+    EmissionKind = llvm::DICompileUnit::FullDebug;
+    break;
+  }
+
   // Create new compile unit.
   // FIXME - Eliminate TheCU.
   TheCU = DBuilder.createCompileUnit(
       LangTag, remapDIPath(MainFileName), remapDIPath(getCurrentDirname()),
       Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers,
-      CGM.getCodeGenOpts().SplitDwarfFile,
-      DebugKind <= CodeGenOptions::DebugLineTablesOnly
-          ? llvm::DIBuilder::LineTablesOnly
-          : llvm::DIBuilder::FullDebug,
-      0 /* DWOid */, DebugKind != CodeGenOptions::LocTrackingOnly);
+      CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */);
 }
 
 llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -463,39 +512,11 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
     return SelTy;
   }
 
-  case BuiltinType::OCLImage1d:
-    return getOrCreateStructPtrType("opencl_image1d_t", OCLImage1dDITy);
-  case BuiltinType::OCLImage1dArray:
-    return getOrCreateStructPtrType("opencl_image1d_array_t",
-                                    OCLImage1dArrayDITy);
-  case BuiltinType::OCLImage1dBuffer:
-    return getOrCreateStructPtrType("opencl_image1d_buffer_t",
-                                    OCLImage1dBufferDITy);
-  case BuiltinType::OCLImage2d:
-    return getOrCreateStructPtrType("opencl_image2d_t", OCLImage2dDITy);
-  case BuiltinType::OCLImage2dArray:
-    return getOrCreateStructPtrType("opencl_image2d_array_t",
-                                    OCLImage2dArrayDITy);
-  case BuiltinType::OCLImage2dDepth:
-    return getOrCreateStructPtrType("opencl_image2d_depth_t",
-                                    OCLImage2dDepthDITy);
-  case BuiltinType::OCLImage2dArrayDepth:
-    return getOrCreateStructPtrType("opencl_image2d_array_depth_t",
-                                    OCLImage2dArrayDepthDITy);
-  case BuiltinType::OCLImage2dMSAA:
-    return getOrCreateStructPtrType("opencl_image2d_msaa_t",
-                                    OCLImage2dMSAADITy);
-  case BuiltinType::OCLImage2dArrayMSAA:
-    return getOrCreateStructPtrType("opencl_image2d_array_msaa_t",
-                                    OCLImage2dArrayMSAADITy);
-  case BuiltinType::OCLImage2dMSAADepth:
-    return getOrCreateStructPtrType("opencl_image2d_msaa_depth_t",
-                                    OCLImage2dMSAADepthDITy);
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-    return getOrCreateStructPtrType("opencl_image2d_array_msaa_depth_t",
-                                    OCLImage2dArrayMSAADepthDITy);
-  case BuiltinType::OCLImage3d:
-    return getOrCreateStructPtrType("opencl_image3d_t", OCLImage3dDITy);
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id: \
+    return getOrCreateStructPtrType("opencl_" #ImgType "_" #Suffix "_t", \
+                                    SingletonId);
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
     return DBuilder.createBasicType(
         "opencl_sampler_t", CGM.getContext().getTypeSize(BT),
@@ -545,7 +566,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
   case BuiltinType::Half:
   case BuiltinType::Float:
   case BuiltinType::LongDouble:
+  case BuiltinType::Float128:
   case BuiltinType::Double:
+    // FIXME: For targets where long double and __float128 have the same size,
+    // they are currently indistinguishable in the debugger without some
+    // special treatment. However, there is currently no consensus on encoding
+    // and this should be updated once a DWARF encoding exists for distinct
+    // floating point types of the same size.
     Encoding = llvm::dwarf::DW_ATE_float;
     break;
   }
@@ -660,10 +687,6 @@ static SmallString<256> getUniqueTagTypeName(const TagType *Ty,
   if (!hasCXXMangling(TD, TheCU) || !TD->isExternallyVisible())
     return FullName;
 
-  // Microsoft Mangler does not have support for mangleCXXRTTIName yet.
-  if (CGM.getTarget().getCXXABI().isMicrosoft())
-    return FullName;
-
   // TODO: This is using the RTTI name. Is there a better way to get
   // a unique string for a type?
   llvm::raw_svector_ostream Out(FullName);
@@ -817,10 +840,10 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
                               /*qualified*/ false);
 
   TemplateSpecializationType::PrintTemplateArgumentList(
-      OS, Ty->getArgs(), Ty->getNumArgs(),
+      OS, Ty->template_arguments(),
       CGM.getContext().getPrintingPolicy());
 
-  TypeAliasDecl *AliasDecl = cast<TypeAliasTemplateDecl>(
+  auto *AliasDecl = cast<TypeAliasTemplateDecl>(
       Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl();
 
   SourceLocation Loc = AliasDecl->getLocation();
@@ -842,6 +865,39 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
       getDeclContextDescriptor(Ty->getDecl()));
 }
 
+static unsigned getDwarfCC(CallingConv CC) {
+  switch (CC) {
+  case CC_C:
+    // Avoid emitting DW_AT_calling_convention if the C convention was used.
+    return 0;
+
+  case CC_X86StdCall:
+    return llvm::dwarf::DW_CC_BORLAND_stdcall;
+  case CC_X86FastCall:
+    return llvm::dwarf::DW_CC_BORLAND_msfastcall;
+  case CC_X86ThisCall:
+    return llvm::dwarf::DW_CC_BORLAND_thiscall;
+  case CC_X86VectorCall:
+    return llvm::dwarf::DW_CC_LLVM_vectorcall;
+  case CC_X86Pascal:
+    return llvm::dwarf::DW_CC_BORLAND_pascal;
+
+  // FIXME: Create new DW_CC_ codes for these calling conventions.
+  case CC_X86_64Win64:
+  case CC_X86_64SysV:
+  case CC_AAPCS:
+  case CC_AAPCS_VFP:
+  case CC_IntelOclBicc:
+  case CC_SpirFunction:
+  case CC_OpenCLKernel:
+  case CC_Swift:
+  case CC_PreserveMost:
+  case CC_PreserveAll:
+    return 0;
+  }
+  return 0;
+}
+
 llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
                                       llvm::DIFile *Unit) {
   SmallVector<llvm::Metadata *, 16> EltTys;
@@ -853,15 +909,16 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
   // otherwise emit it as a variadic function.
   if (isa<FunctionNoProtoType>(Ty))
     EltTys.push_back(DBuilder.createUnspecifiedParameter());
-  else if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(Ty)) {
-    for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
-      EltTys.push_back(getOrCreateType(FPT->getParamType(i), Unit));
+  else if (const auto *FPT = dyn_cast<FunctionProtoType>(Ty)) {
+    for (const QualType &ParamType : FPT->param_types())
+      EltTys.push_back(getOrCreateType(ParamType, Unit));
     if (FPT->isVariadic())
       EltTys.push_back(DBuilder.createUnspecifiedParameter());
   }
 
   llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
-  return DBuilder.createSubroutineType(EltTypeArray);
+  return DBuilder.createSubroutineType(EltTypeArray, 0,
+                                       getDwarfCC(Ty->getCallConv()));
 }
 
 /// Convert an AccessSpecifier into the corresponding DINode flag.
@@ -890,10 +947,38 @@ static unsigned getAccessFlag(AccessSpecifier Access, const RecordDecl *RD) {
   llvm_unreachable("unexpected access enumerator");
 }
 
-llvm::DIType *CGDebugInfo::createFieldType(
-    StringRef name, QualType type, uint64_t sizeInBitsOverride,
-    SourceLocation loc, AccessSpecifier AS, uint64_t offsetInBits,
-    llvm::DIFile *tunit, llvm::DIScope *scope, const RecordDecl *RD) {
+llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl,
+                                              llvm::DIScope *RecordTy,
+                                              const RecordDecl *RD) {
+  StringRef Name = BitFieldDecl->getName();
+  QualType Ty = BitFieldDecl->getType();
+  SourceLocation Loc = BitFieldDecl->getLocation();
+  llvm::DIFile *VUnit = getOrCreateFile(Loc);
+  llvm::DIType *DebugType = getOrCreateType(Ty, VUnit);
+
+  // Get the location for the field.
+  llvm::DIFile *File = getOrCreateFile(Loc);
+  unsigned Line = getLineNumber(Loc);
+
+  const CGBitFieldInfo &BitFieldInfo =
+      CGM.getTypes().getCGRecordLayout(RD).getBitFieldInfo(BitFieldDecl);
+  uint64_t SizeInBits = BitFieldInfo.Size;
+  assert(SizeInBits > 0 && "found named 0-width bitfield");
+  unsigned AlignInBits = CGM.getContext().getTypeAlign(Ty);
+  uint64_t StorageOffsetInBits =
+      CGM.getContext().toBits(BitFieldInfo.StorageOffset);
+  uint64_t OffsetInBits = StorageOffsetInBits + BitFieldInfo.Offset;
+  unsigned Flags = getAccessFlag(BitFieldDecl->getAccess(), RD);
+  return DBuilder.createBitFieldMemberType(
+      RecordTy, Name, File, Line, SizeInBits, AlignInBits, OffsetInBits,
+      StorageOffsetInBits, Flags, DebugType);
+}
+
+llvm::DIType *
+CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc,
+                             AccessSpecifier AS, uint64_t offsetInBits,
+                             llvm::DIFile *tunit, llvm::DIScope *scope,
+                             const RecordDecl *RD) {
   llvm::DIType *debugType = getOrCreateType(type, tunit);
 
   // Get the location for the field.
@@ -906,9 +991,6 @@ llvm::DIType *CGDebugInfo::createFieldType(
     TypeInfo TI = CGM.getContext().getTypeInfo(type);
     SizeInBits = TI.Width;
     AlignInBits = TI.Align;
-
-    if (sizeInBitsOverride)
-      SizeInBits = sizeInBitsOverride;
   }
 
   unsigned flags = getAccessFlag(AS, RD);
@@ -930,19 +1012,15 @@ void CGDebugInfo::CollectRecordLambdaFields(
        I != E; ++I, ++Field, ++fieldno) {
     const LambdaCapture &C = *I;
     if (C.capturesVariable()) {
+      SourceLocation Loc = C.getLocation();
+      assert(!Field->isBitField() && "lambdas don't have bitfield members!");
       VarDecl *V = C.getCapturedVar();
-      llvm::DIFile *VUnit = getOrCreateFile(C.getLocation());
       StringRef VName = V->getName();
-      uint64_t SizeInBitsOverride = 0;
-      if (Field->isBitField()) {
-        SizeInBitsOverride = Field->getBitWidthValue(CGM.getContext());
-        assert(SizeInBitsOverride && "found named 0-width bitfield");
-      }
-      llvm::DIType *fieldType = createFieldType(
-          VName, Field->getType(), SizeInBitsOverride, C.getLocation(),
-          Field->getAccess(), layout.getFieldOffset(fieldno), VUnit, RecordTy,
-          CXXDecl);
-      elements.push_back(fieldType);
+      llvm::DIFile *VUnit = getOrCreateFile(Loc);
+      llvm::DIType *FieldType = createFieldType(
+          VName, Field->getType(), Loc, Field->getAccess(),
+          layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);
+      elements.push_back(FieldType);
     } else if (C.capturesThis()) {
       // TODO: Need to handle 'this' in some way by probably renaming the
       // this of the lambda class and having a field member of 'this' or
@@ -952,7 +1030,7 @@ void CGDebugInfo::CollectRecordLambdaFields(
       llvm::DIFile *VUnit = getOrCreateFile(f->getLocation());
       QualType type = f->getType();
       llvm::DIType *fieldType = createFieldType(
-          "this", type, 0, f->getLocation(), f->getAccess(),
+          "this", type, f->getLocation(), f->getAccess(),
           layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl);
 
       elements.push_back(fieldType);
@@ -1000,24 +1078,23 @@ void CGDebugInfo::CollectRecordNormalField(
   if (name.empty() && !type->isRecordType())
     return;
 
-  uint64_t SizeInBitsOverride = 0;
+  llvm::DIType *FieldType;
   if (field->isBitField()) {
-    SizeInBitsOverride = field->getBitWidthValue(CGM.getContext());
-    assert(SizeInBitsOverride && "found named 0-width bitfield");
+    FieldType = createBitFieldType(field, RecordTy, RD);
+  } else {
+    FieldType =
+        createFieldType(name, type, field->getLocation(), field->getAccess(),
+                        OffsetInBits, tunit, RecordTy, RD);
   }
 
-  llvm::DIType *fieldType =
-      createFieldType(name, type, SizeInBitsOverride, field->getLocation(),
-                      field->getAccess(), OffsetInBits, tunit, RecordTy, RD);
-
-  elements.push_back(fieldType);
+  elements.push_back(FieldType);
 }
 
 void CGDebugInfo::CollectRecordFields(
     const RecordDecl *record, llvm::DIFile *tunit,
     SmallVectorImpl<llvm::Metadata *> &elements,
     llvm::DICompositeType *RecordTy) {
-  const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(record);
+  const auto *CXXDecl = dyn_cast<CXXRecordDecl>(record);
 
   if (CXXDecl && CXXDecl->isLambda())
     CollectRecordLambdaFields(CXXDecl, elements, RecordTy);
@@ -1031,6 +1108,8 @@ void CGDebugInfo::CollectRecordFields(
     // the corresponding declarations in the source program.
     for (const auto *I : record->decls())
       if (const auto *V = dyn_cast<VarDecl>(I)) {
+        if (V->hasAttr<NoDebugAttr>())
+          continue;
         // Reuse the existing static member declaration if one exists
         auto MI = StaticDataMemberCache.find(V->getCanonicalDecl());
         if (MI != StaticDataMemberCache.end()) {
@@ -1112,13 +1191,14 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
   if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
     Flags |= llvm::DINode::FlagRValueReference;
 
-  return DBuilder.createSubroutineType(EltTypeArray, Flags);
+  return DBuilder.createSubroutineType(EltTypeArray, Flags,
+                                       getDwarfCC(Func->getCallConv()));
 }
 
 /// isFunctionLocalClass - Return true if CXXRecordDecl is defined
 /// inside a function.
 static bool isFunctionLocalClass(const CXXRecordDecl *RD) {
-  if (const CXXRecordDecl *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext()))
+  if (const auto *NRD = dyn_cast<CXXRecordDecl>(RD->getDeclContext()))
     return isFunctionLocalClass(NRD);
   if (isa<FunctionDecl>(RD->getDeclContext()))
     return true;
@@ -1136,6 +1216,11 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
   // Since a single ctor/dtor corresponds to multiple functions, it doesn't
   // make sense to give a single ctor/dtor a linkage name.
   StringRef MethodLinkageName;
+  // FIXME: 'isFunctionLocalClass' seems like an arbitrary/unintentional
+  // property to use here. It may've been intended to model "is non-external
+  // type" but misses cases of non-function-local but non-external classes such
+  // as those in anonymous namespaces as well as the reverse - external types
+  // that are function local, such as those in (non-local) inline functions.
   if (!IsCtorOrDtor && !isFunctionLocalClass(Method->getParent()))
     MethodLinkageName = CGM.getMangledName(Method);
 
@@ -1151,6 +1236,8 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
   llvm::DIType *ContainingType = nullptr;
   unsigned Virtuality = 0;
   unsigned VIndex = 0;
+  unsigned Flags = 0;
+  int ThisAdjustment = 0;
 
   if (Method->isVirtual()) {
     if (Method->isPure())
@@ -1158,26 +1245,45 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
     else
       Virtuality = llvm::dwarf::DW_VIRTUALITY_virtual;
 
-    // It doesn't make sense to give a virtual destructor a vtable index,
-    // since a single destructor has two entries in the vtable.
-    // FIXME: Add proper support for debug info for virtual calls in
-    // the Microsoft ABI, where we may use multiple vptrs to make a vftable
-    // lookup if we have multiple or virtual inheritance.
-    if (!isa<CXXDestructorDecl>(Method) &&
-        !CGM.getTarget().getCXXABI().isMicrosoft())
-      VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method);
+    if (CGM.getTarget().getCXXABI().isItaniumFamily()) {
+      // It doesn't make sense to give a virtual destructor a vtable index,
+      // since a single destructor has two entries in the vtable.
+      if (!isa<CXXDestructorDecl>(Method))
+        VIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(Method);
+    } else {
+      // Emit MS ABI vftable information.  There is only one entry for the
+      // deleting dtor.
+      const auto *DD = dyn_cast<CXXDestructorDecl>(Method);
+      GlobalDecl GD = DD ? GlobalDecl(DD, Dtor_Deleting) : GlobalDecl(Method);
+      MicrosoftVTableContext::MethodVFTableLocation ML =
+          CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
+      VIndex = ML.Index;
+
+      // CodeView only records the vftable offset in the class that introduces
+      // the virtual method. This is possible because, unlike Itanium, the MS
+      // C++ ABI does not include all virtual methods from non-primary bases in
+      // the vtable for the most derived class. For example, if C inherits from
+      // A and B, C's primary vftable will not include B's virtual methods.
+      if (Method->begin_overridden_methods() == Method->end_overridden_methods())
+        Flags |= llvm::DINode::FlagIntroducedVirtual;
+
+      // The 'this' adjustment accounts for both the virtual and non-virtual
+      // portions of the adjustment. Presumably the debugger only uses it when
+      // it knows the dynamic type of an object.
+      ThisAdjustment = CGM.getCXXABI()
+                           .getVirtualFunctionPrologueThisAdjustment(GD)
+                           .getQuantity();
+    }
     ContainingType = RecordTy;
   }
 
-  unsigned Flags = 0;
   if (Method->isImplicit())
     Flags |= llvm::DINode::FlagArtificial;
   Flags |= getAccessFlag(Method->getAccess(), Method->getParent());
-  if (const CXXConstructorDecl *CXXC = dyn_cast<CXXConstructorDecl>(Method)) {
+  if (const auto *CXXC = dyn_cast<CXXConstructorDecl>(Method)) {
     if (CXXC->isExplicit())
       Flags |= llvm::DINode::FlagExplicit;
-  } else if (const CXXConversionDecl *CXXC =
-                 dyn_cast<CXXConversionDecl>(Method)) {
+  } else if (const auto *CXXC = dyn_cast<CXXConversionDecl>(Method)) {
     if (CXXC->isExplicit())
       Flags |= llvm::DINode::FlagExplicit;
   }
@@ -1191,9 +1297,9 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
   llvm::DINodeArray TParamsArray = CollectFunctionTemplateParams(Method, Unit);
   llvm::DISubprogram *SP = DBuilder.createMethod(
       RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
-      MethodTy, /*isLocalToUnit=*/false,
-      /* isDefinition=*/false, Virtuality, VIndex, ContainingType, Flags,
-      CGM.getLangOpts().Optimize, TParamsArray.get());
+      MethodTy, /*isLocalToUnit=*/false, /*isDefinition=*/false, Virtuality,
+      VIndex, ThisAdjustment, ContainingType, Flags, CGM.getLangOpts().Optimize,
+      TParamsArray.get());
 
   SPCache[Method->getCanonicalDecl()].reset(SP);
 
@@ -1246,7 +1352,7 @@ void CGDebugInfo::CollectCXXBases(const CXXRecordDecl *RD, llvm::DIFile *Unit,
     unsigned BFlags = 0;
     uint64_t BaseOffset;
 
-    const CXXRecordDecl *Base =
+    const auto *Base =
         cast<CXXRecordDecl>(BI.getType()->getAs<RecordType>()->getDecl());
 
     if (BI.isVirtual()) {
@@ -1334,8 +1440,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
       llvm::Constant *V = nullptr;
       // Special case member data pointer null values since they're actually -1
       // instead of zero.
-      if (const MemberPointerType *MPT =
-              dyn_cast<MemberPointerType>(T.getTypePtr()))
+      if (const auto *MPT = dyn_cast<MemberPointerType>(T.getTypePtr()))
         // But treat member function pointers as simple zero integers because
         // it's easier than having a special case in LLVM's CodeGen. If LLVM
         // CodeGen grows handling for values of non-null member function
@@ -1346,7 +1451,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
       if (!V)
         V = llvm::ConstantInt::get(CGM.Int8Ty, 0);
       TemplateParams.push_back(DBuilder.createTemplateValueParameter(
-          TheCU, Name, TTy, cast<llvm::Constant>(V)));
+          TheCU, Name, TTy, V));
     } break;
     case TemplateArgument::Template:
       TemplateParams.push_back(DBuilder.createTemplateTemplateParameter(
@@ -1367,7 +1472,7 @@ CGDebugInfo::CollectTemplateParams(const TemplateParameterList *TPList,
       assert(V && "Expression in template argument isn't constant");
       llvm::DIType *TTy = getOrCreateType(T, Unit);
       TemplateParams.push_back(DBuilder.createTemplateValueParameter(
-          TheCU, Name, TTy, cast<llvm::Constant>(V->stripPointerCasts())));
+          TheCU, Name, TTy, V->stripPointerCasts()));
     } break;
     // And the following should never occur:
     case TemplateArgument::TemplateExpansion:
@@ -1446,7 +1551,7 @@ void CGDebugInfo::CollectVTableInfo(const CXXRecordDecl *RD, llvm::DIFile *Unit,
 
 llvm::DIType *CGDebugInfo::getOrCreateRecordType(QualType RTy,
                                                  SourceLocation Loc) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   llvm::DIType *T = getOrCreateType(RTy, getOrCreateFile(Loc));
   return T;
 }
@@ -1458,22 +1563,17 @@ llvm::DIType *CGDebugInfo::getOrCreateInterfaceType(QualType D,
 
 llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D,
                                                      SourceLocation Loc) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   assert(!D.isNull() && "null type");
   llvm::DIType *T = getOrCreateType(D, getOrCreateFile(Loc));
   assert(T && "could not create debug info for type");
 
-  // Composite types with UIDs were already retained by DIBuilder
-  // because they are only referenced by name in the IR.
-  if (auto *CTy = dyn_cast<llvm::DICompositeType>(T))
-    if (!CTy->getIdentifier().empty())
-      return T;
   RetainedTypes.push_back(D.getAsOpaquePtr());
   return T;
 }
 
 void CGDebugInfo::completeType(const EnumDecl *ED) {
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
   QualType Ty = CGM.getContext().getEnumType(ED);
   void *TyPtr = Ty.getAsOpaquePtr();
@@ -1486,16 +1586,16 @@ void CGDebugInfo::completeType(const EnumDecl *ED) {
 }
 
 void CGDebugInfo::completeType(const RecordDecl *RD) {
-  if (DebugKind > CodeGenOptions::LimitedDebugInfo ||
+  if (DebugKind > codegenoptions::LimitedDebugInfo ||
       !CGM.getLangOpts().CPlusPlus)
     completeRequiredType(RD);
 }
 
 void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
 
-  if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
+  if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
     if (CXXDecl->isDynamicClass())
       return;
 
@@ -1509,7 +1609,7 @@ void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
 }
 
 void CGDebugInfo::completeClassData(const RecordDecl *RD) {
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
   QualType Ty = CGM.getContext().getRecordType(RD);
   void *TyPtr = Ty.getAsOpaquePtr();
@@ -1523,23 +1623,38 @@ void CGDebugInfo::completeClassData(const RecordDecl *RD) {
 
 static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I,
                                         CXXRecordDecl::method_iterator End) {
-  for (; I != End; ++I)
-    if (FunctionDecl *Tmpl = I->getInstantiatedFromMemberFunction())
+  for (CXXMethodDecl *MD : llvm::make_range(I, End))
+    if (FunctionDecl *Tmpl = MD->getInstantiatedFromMemberFunction())
       if (!Tmpl->isImplicit() && Tmpl->isThisDeclarationADefinition() &&
-          !I->getMemberSpecializationInfo()->isExplicitSpecialization())
+          !MD->getMemberSpecializationInfo()->isExplicitSpecialization())
         return true;
   return false;
 }
 
-static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
-                                 bool DebugTypeExtRefs,
-                                 const RecordDecl *RD,
+/// Does a type definition exist in an imported clang module?
+static bool isDefinedInClangModule(const RecordDecl *RD) {
+  if (!RD || !RD->isFromASTFile())
+    return false;
+  if (!RD->isExternallyVisible() && RD->getName().empty())
+    return false;
+  if (auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD)) {
+    assert(CXXDecl->isCompleteDefinition() && "incomplete record definition");
+    if (CXXDecl->getTemplateSpecializationKind() != TSK_Undeclared)
+      // Make sure the instantiation is actually in a module.
+      if (CXXDecl->field_begin() != CXXDecl->field_end())
+        return CXXDecl->field_begin()->isFromASTFile();
+  }
+
+  return true;
+}
+
+static bool shouldOmitDefinition(codegenoptions::DebugInfoKind DebugKind,
+                                 bool DebugTypeExtRefs, const RecordDecl *RD,
                                  const LangOptions &LangOpts) {
-  // Does the type exist in an imported clang module?
-  if (DebugTypeExtRefs && RD->isFromASTFile() && RD->getDefinition())
-      return true;
+  if (DebugTypeExtRefs && isDefinedInClangModule(RD->getDefinition()))
+    return true;
 
-  if (DebugKind > CodeGenOptions::LimitedDebugInfo)
+  if (DebugKind > codegenoptions::LimitedDebugInfo)
     return false;
 
   if (!LangOpts.CPlusPlus)
@@ -1548,7 +1663,7 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
   if (!RD->isCompleteDefinitionRequired())
     return true;
 
-  const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
+  const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
 
   if (!CXXDecl)
     return false;
@@ -1557,8 +1672,7 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
     return true;
 
   TemplateSpecializationKind Spec = TSK_Undeclared;
-  if (const ClassTemplateSpecializationDecl *SD =
-          dyn_cast<ClassTemplateSpecializationDecl>(RD))
+  if (const auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(RD))
     Spec = SD->getSpecializationKind();
 
   if (Spec == TSK_ExplicitInstantiationDeclaration &&
@@ -1600,7 +1714,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
   if (!D || !D->isCompleteDefinition())
     return FwdDecl;
 
-  if (const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
+  if (const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD))
     CollectContainingType(CXXDecl, FwdDecl);
 
   // Push the struct on region stack.
@@ -1615,7 +1729,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
   // gdb tests will depend on a certain ordering at printout. The debug
   // information offsets are still correct if we merge them all together
   // though.
-  const CXXRecordDecl *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
+  const auto *CXXDecl = dyn_cast<CXXRecordDecl>(RD);
   if (CXXDecl) {
     CollectCXXBases(CXXDecl, DefUnit, EltTys, FwdDecl);
     CollectVTableInfo(CXXDecl, DefUnit, EltTys);
@@ -1676,8 +1790,11 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   if (!ID)
     return nullptr;
 
-  // Return a forward declaration if this type was imported from a clang module.
-  if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition())
+  // Return a forward declaration if this type was imported from a clang module,
+  // and this is not the compile unit with the implementation of the type (which
+  // may contain hidden ivars).
+  if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition() &&
+      !ID->getImplementation())
     return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
                                       ID->getName(),
                                       getDeclContextDescriptor(ID), Unit, 0);
@@ -1739,11 +1856,14 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
 
   bool IsRootModule = M ? !M->Parent : true;
   if (CreateSkeletonCU && IsRootModule) {
+    // PCH files don't have a signature field in the control block,
+    // but LLVM detects skeleton CUs by looking for a non-zero DWO id.
+    uint64_t Signature = Mod.getSignature() ? Mod.getSignature() : ~1ULL;
     llvm::DIBuilder DIB(CGM.getModule());
     DIB.createCompileUnit(TheCU->getSourceLanguage(), Mod.getModuleName(),
                           Mod.getPath(), TheCU->getProducer(), true,
                           StringRef(), 0, Mod.getASTFile(),
-                          llvm::DIBuilder::FullDebug, Mod.getSignature());
+                          llvm::DICompileUnit::FullDebug, Signature);
     DIB.finalize();
   }
   llvm::DIModule *Parent =
@@ -1942,7 +2062,7 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
   uint64_t Align;
 
   // FIXME: make getTypeAlign() aware of VLAs and incomplete array types
-  if (const VariableArrayType *VAT = dyn_cast<VariableArrayType>(Ty)) {
+  if (const auto *VAT = dyn_cast<VariableArrayType>(Ty)) {
     Size = 0;
     Align =
         CGM.getContext().getTypeAlign(CGM.getContext().getBaseElementType(VAT));
@@ -1975,7 +2095,7 @@ llvm::DIType *CGDebugInfo::CreateType(const ArrayType *Ty, llvm::DIFile *Unit) {
     //     int x[0];
     //   };
     int64_t Count = -1; // Count == -1 is an unbounded array.
-    if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(Ty))
+    if (const auto *CAT = dyn_cast<ConstantArrayType>(Ty))
       Count = CAT->getSize().getZExtValue();
 
     // FIXME: Verify this is right for VLAs.
@@ -2003,12 +2123,35 @@ llvm::DIType *CGDebugInfo::CreateType(const RValueReferenceType *Ty,
 
 llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
                                       llvm::DIFile *U) {
-  uint64_t Size =
-      !Ty->isIncompleteType() ? CGM.getContext().getTypeSize(Ty) : 0;
+  unsigned Flags = 0;
+  uint64_t Size = 0;
+
+  if (!Ty->isIncompleteType()) {
+    Size = CGM.getContext().getTypeSize(Ty);
+
+    // Set the MS inheritance model. There is no flag for the unspecified model.
+    if (CGM.getTarget().getCXXABI().isMicrosoft()) {
+      switch (Ty->getMostRecentCXXRecordDecl()->getMSInheritanceModel()) {
+      case MSInheritanceAttr::Keyword_single_inheritance:
+        Flags |= llvm::DINode::FlagSingleInheritance;
+        break;
+      case MSInheritanceAttr::Keyword_multiple_inheritance:
+        Flags |= llvm::DINode::FlagMultipleInheritance;
+        break;
+      case MSInheritanceAttr::Keyword_virtual_inheritance:
+        Flags |= llvm::DINode::FlagVirtualInheritance;
+        break;
+      case MSInheritanceAttr::Keyword_unspecified_inheritance:
+        break;
+      }
+    }
+  }
+
   llvm::DIType *ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U);
   if (Ty->isMemberDataPointerType())
     return DBuilder.createMemberPointerType(
-        getOrCreateType(Ty->getPointeeType(), U), ClassType, Size);
+        getOrCreateType(Ty->getPointeeType(), U), ClassType, Size, /*Align=*/0,
+        Flags);
 
   const FunctionProtoType *FPT =
       Ty->getPointeeType()->getAs<FunctionProtoType>();
@@ -2016,7 +2159,7 @@ llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
       getOrCreateInstanceMethodType(CGM.getContext().getPointerType(QualType(
                                         Ty->getClass(), FPT->getTypeQuals())),
                                     FPT, U),
-      ClassType, Size);
+      ClassType, Size, /*Align=*/0, Flags);
 }
 
 llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) {
@@ -2048,13 +2191,23 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
   // If this is just a forward declaration, construct an appropriately
   // marked node and just return it.
   if (isImportedFromModule || !ED->getDefinition()) {
+    // Note that it is possible for enums to be created as part of
+    // their own declcontext. In this case a FwdDecl will be created
+    // twice. This doesn't cause a problem because both FwdDecls are
+    // entered into the ReplaceMap: finalize() will replace the first
+    // FwdDecl with the second and then replace the second with
+    // complete type.
     llvm::DIScope *EDContext = getDeclContextDescriptor(ED);
     llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
+    llvm::TempDIScope TmpContext(DBuilder.createReplaceableCompositeType(
+        llvm::dwarf::DW_TAG_enumeration_type, "", TheCU, DefUnit, 0));
+
     unsigned Line = getLineNumber(ED->getLocation());
     StringRef EDName = ED->getName();
     llvm::DIType *RetTy = DBuilder.createReplaceableCompositeType(
         llvm::dwarf::DW_TAG_enumeration_type, EDName, EDContext, DefUnit, Line,
         0, Size, Align, llvm::DINode::FlagFwdDecl, FullName);
+
     ReplaceMap.emplace_back(
         std::piecewise_construct, std::make_tuple(Ty),
         std::make_tuple(static_cast<llvm::Metadata *>(RetTy)));
@@ -2168,7 +2321,7 @@ llvm::DIType *CGDebugInfo::getTypeOrNull(QualType Ty) {
 
 void CGDebugInfo::completeTemplateDefinition(
     const ClassTemplateSpecializationDecl &SD) {
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
 
   completeClassData(&SD);
@@ -2220,8 +2373,12 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) {
     // option.
     FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager());
     if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) {
+      // This is a (sub-)module.
       auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
       return getOrCreateModuleRef(Info, /*SkeletonCU=*/false);
+    } else {
+      // This the precompiled header being built.
+      return getOrCreateModuleRef(PCHDescriptor, /*SkeletonCU=*/false);
     }
   }
 
@@ -2369,11 +2526,34 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
       getTagForRecord(RD), RDName, RDContext, DefUnit, Line, 0, Size, Align, 0,
       FullName);
 
+  // Elements of composite types usually have back to the type, creating
+  // uniquing cycles.  Distinct nodes are more efficient.
+  switch (RealDecl->getTag()) {
+  default:
+    llvm_unreachable("invalid composite type tag");
+
+  case llvm::dwarf::DW_TAG_array_type:
+  case llvm::dwarf::DW_TAG_enumeration_type:
+    // Array elements and most enumeration elements don't have back references,
+    // so they don't tend to be involved in uniquing cycles and there is some
+    // chance of merging them when linking together two modules.  Only make
+    // them distinct if they are ODR-uniqued.
+    if (FullName.empty())
+      break;
+
+  case llvm::dwarf::DW_TAG_structure_type:
+  case llvm::dwarf::DW_TAG_union_type:
+  case llvm::dwarf::DW_TAG_class_type:
+    // Immediatley resolve to a distinct node.
+    RealDecl =
+        llvm::MDNode::replaceWithDistinct(llvm::TempDICompositeType(RealDecl));
+    break;
+  }
+
   RegionMap[Ty->getDecl()].reset(RealDecl);
   TypeCache[QualType(Ty, 0).getAsOpaquePtr()].reset(RealDecl);
 
-  if (const ClassTemplateSpecializationDecl *TSpecial =
-          dyn_cast<ClassTemplateSpecializationDecl>(RD))
+  if (const auto *TSpecial = dyn_cast<ClassTemplateSpecializationDecl>(RD))
     DBuilder.replaceArrays(RealDecl, llvm::DINodeArray(),
                            CollectCXXTemplateParams(TSpecial, DefUnit));
   return RealDecl;
@@ -2420,7 +2600,7 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
                                            llvm::DIScope *&FDContext,
                                            llvm::DINodeArray &TParamsArray,
                                            unsigned &Flags) {
-  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+  const auto *FD = cast<FunctionDecl>(GD.getDecl());
   Name = getFunctionName(FD);
   // Use mangled name as linkage name for C/C++ functions.
   if (FD->hasPrototype()) {
@@ -2430,13 +2610,12 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
   // No need to replicate the linkage name if it isn't different from the
   // subprogram name, no need to have it at all unless coverage is enabled or
   // debug is set to more than just line tables.
-  if (LinkageName == Name ||
-      (!CGM.getCodeGenOpts().EmitGcovArcs &&
-       !CGM.getCodeGenOpts().EmitGcovNotes &&
-       DebugKind <= CodeGenOptions::DebugLineTablesOnly))
+  if (LinkageName == Name || (!CGM.getCodeGenOpts().EmitGcovArcs &&
+                              !CGM.getCodeGenOpts().EmitGcovNotes &&
+                              DebugKind <= codegenoptions::DebugLineTablesOnly))
     LinkageName = StringRef();
 
-  if (DebugKind >= CodeGenOptions::LimitedDebugInfo) {
+  if (DebugKind >= codegenoptions::LimitedDebugInfo) {
     if (const NamespaceDecl *NSDecl =
         dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
       FDContext = getOrCreateNameSpace(NSDecl);
@@ -2513,15 +2692,15 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) {
   SmallVector<QualType, 16> ArgTypes;
   for (const ParmVarDecl *Parm: FD->parameters())
     ArgTypes.push_back(Parm->getType());
-  QualType FnType =
-    CGM.getContext().getFunctionType(FD->getReturnType(), ArgTypes,
-                                     FunctionProtoType::ExtProtoInfo());
+  CallingConv CC = FD->getType()->castAs<FunctionType>()->getCallConv();
+  QualType FnType = CGM.getContext().getFunctionType(
+      FD->getReturnType(), ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
   llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
       DContext, Name, LinkageName, Unit, Line,
       getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(),
       /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
       TParamsArray.get(), getFunctionDeclaration(FD));
-  const FunctionDecl *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
+  const auto *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
   FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
                                  std::make_tuple(CanonDecl),
                                  std::make_tuple(SP));
@@ -2553,7 +2732,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
   // we would otherwise do to get a type for a pointee. (forward declarations in
   // limited debug info, full definitions (if the type definition is available)
   // in unlimited debug info)
-  if (const TypeDecl *TD = dyn_cast<TypeDecl>(D))
+  if (const auto *TD = dyn_cast<TypeDecl>(D))
     return getOrCreateType(CGM.getContext().getTypeDeclType(TD),
                            getOrCreateFile(TD->getLocation()));
   auto I = DeclCache.find(D->getCanonicalDecl());
@@ -2563,7 +2742,7 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
 
   // No definition for now. Emit a forward definition that might be
   // merged with a potential upcoming definition.
-  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+  if (const auto *FD = dyn_cast<FunctionDecl>(D))
     return getFunctionForwardDeclaration(FD);
   else if (const auto *VD = dyn_cast<VarDecl>(D))
     return getGlobalVariableForwardDeclaration(VD);
@@ -2572,10 +2751,10 @@ llvm::DINode *CGDebugInfo::getDeclarationOrDefinition(const Decl *D) {
 }
 
 llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
-  if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly)
     return nullptr;
 
-  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  const auto *FD = dyn_cast<FunctionDecl>(D);
   if (!FD)
     return nullptr;
 
@@ -2584,8 +2763,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
 
   auto MI = SPCache.find(FD->getCanonicalDecl());
   if (MI == SPCache.end()) {
-    if (const CXXMethodDecl *MD =
-            dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) {
+    if (const auto *MD = dyn_cast<CXXMethodDecl>(FD->getCanonicalDecl())) {
       return CreateCXXMemberFunction(MD, getOrCreateFile(MD->getLocation()),
                                      cast<llvm::DICompositeType>(S));
     }
@@ -2612,14 +2790,18 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
 llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
                                                              QualType FnType,
                                                              llvm::DIFile *F) {
-  if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (!D || DebugKind <= codegenoptions::DebugLineTablesOnly)
     // Create fake but valid subroutine type. Otherwise -verify would fail, and
     // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields.
     return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None));
 
-  if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D))
+  if (const auto *Method = dyn_cast<CXXMethodDecl>(D))
     return getOrCreateMethodType(Method, F);
-  if (const ObjCMethodDecl *OMethod = dyn_cast<ObjCMethodDecl>(D)) {
+
+  const auto *FTy = FnType->getAs<FunctionType>();
+  CallingConv CC = FTy ? FTy->getCallConv() : CallingConv::CC_C;
+
+  if (const auto *OMethod = dyn_cast<ObjCMethodDecl>(D)) {
     // Add "self" and "_cmd"
     SmallVector<llvm::Metadata *, 16> Elts;
 
@@ -2645,28 +2827,28 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
     Elts.push_back(DBuilder.createArtificialType(
         getOrCreateType(CGM.getContext().getObjCSelType(), F)));
     // Get rest of the arguments.
-    for (const auto *PI : OMethod->params())
+    for (const auto *PI : OMethod->parameters())
       Elts.push_back(getOrCreateType(PI->getType(), F));
     // Variadic methods need a special marker at the end of the type list.
     if (OMethod->isVariadic())
       Elts.push_back(DBuilder.createUnspecifiedParameter());
 
     llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
-    return DBuilder.createSubroutineType(EltTypeArray);
+    return DBuilder.createSubroutineType(EltTypeArray, 0, getDwarfCC(CC));
   }
 
   // Handle variadic function types; they need an additional
   // unspecified parameter.
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
+  if (const auto *FD = dyn_cast<FunctionDecl>(D))
     if (FD->isVariadic()) {
       SmallVector<llvm::Metadata *, 16> EltTys;
       EltTys.push_back(getOrCreateType(FD->getReturnType(), F));
-      if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FnType))
-        for (unsigned i = 0, e = FPT->getNumParams(); i != e; ++i)
-          EltTys.push_back(getOrCreateType(FPT->getParamType(i), F));
+      if (const auto *FPT = dyn_cast<FunctionProtoType>(FnType))
+        for (QualType ParamType : FPT->param_types())
+          EltTys.push_back(getOrCreateType(ParamType, F));
       EltTys.push_back(DBuilder.createUnspecifiedParameter());
       llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
-      return DBuilder.createSubroutineType(EltTypeArray);
+      return DBuilder.createSubroutineType(EltTypeArray, 0, getDwarfCC(CC));
     }
 
   return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F));
@@ -2691,7 +2873,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   if (!HasDecl) {
     // Use llvm function name.
     LinkageName = Fn->getName();
-  } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+  } else if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     // If there is a subprogram for this function available then use it.
     auto FI = SPCache.find(FD->getCanonicalDecl());
     if (FI != SPCache.end()) {
@@ -2704,7 +2886,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
     }
     collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext,
                              TParamsArray, Flags);
-  } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) {
+  } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) {
     Name = getObjCMethodName(OMD);
     Flags |= llvm::DINode::FlagPrototyped;
   } else {
@@ -2712,7 +2894,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
     Name = Fn->getName();
     Flags |= llvm::DINode::FlagPrototyped;
   }
-  if (!Name.empty() && Name[0] == '\01')
+  if (Name.startswith("\01"))
     Name = Name.substr(1);
 
   if (!HasDecl || D->isImplicit()) {
@@ -2731,7 +2913,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   // are emitted as CU level entities by the backend.
   llvm::DISubprogram *SP = DBuilder.createFunction(
       FDContext, Name, LinkageName, Unit, LineNo,
-      getOrCreateFunctionType(D, FnType, Unit), Fn->hasInternalLinkage(),
+      getOrCreateFunctionType(D, FnType, Unit), Fn->hasLocalLinkage(),
       true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
       TParamsArray.get(), getFunctionDeclaration(D));
   Fn->setSubprogram(SP);
@@ -2739,7 +2921,7 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   // code for the initialization of globals. Do not record these decls
   // as they will overwrite the actual VarDecl Decl in the cache.
   if (HasDecl && isa<FunctionDecl>(D))
-    DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(SP));
+    DeclCache[D->getCanonicalDecl()].reset(SP);
 
   // Push the function onto the lexical block stack.
   LexicalBlockStack.emplace_back(SP);
@@ -2765,7 +2947,7 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
     // If there is a DISubprogram for this function available then use it.
     collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext,
                              TParamsArray, Flags);
-  } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) {
+  } else if (const auto *OMD = dyn_cast<ObjCMethodDecl>(D)) {
     Name = getObjCMethodName(OMD);
     Flags |= llvm::DINode::FlagPrototyped;
   } else {
@@ -2783,11 +2965,11 @@ void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
   unsigned LineNo = getLineNumber(Loc);
   unsigned ScopeLine = 0;
 
-  DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo,
-                          getOrCreateFunctionType(D, FnType, Unit),
-                          false /*internalLinkage*/, true /*definition*/,
-                          ScopeLine, Flags, CGM.getLangOpts().Optimize,
-                          TParamsArray.get(), getFunctionDeclaration(D));
+  DBuilder.retainType(DBuilder.createFunction(
+      FDContext, Name, LinkageName, Unit, LineNo,
+      getOrCreateFunctionType(D, FnType, Unit), false /*internalLinkage*/,
+      false /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
+      TParamsArray.get(), getFunctionDeclaration(D)));
 }
 
 void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
@@ -2820,7 +3002,7 @@ void CGDebugInfo::EmitLexicalBlockStart(CGBuilderTy &Builder,
   Builder.SetCurrentDebugLocation(llvm::DebugLoc::get(
       getLineNumber(Loc), getColumnNumber(Loc), LexicalBlockStack.back()));
 
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
 
   // Create a new lexical block and push it on the stack.
@@ -2834,7 +3016,7 @@ void CGDebugInfo::EmitLexicalBlockEnd(CGBuilderTy &Builder,
   // Provide an entry in the line table for the end of the block.
   EmitLocation(Builder, Loc);
 
-  if (DebugKind <= CodeGenOptions::DebugLineTablesOnly)
+  if (DebugKind <= codegenoptions::DebugLineTablesOnly)
     return;
 
   LexicalBlockStack.pop_back();
@@ -2896,8 +3078,7 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
                   CGM.getTarget().getPointerAlign(0))) {
     CharUnits FieldOffsetInBytes =
         CGM.getContext().toCharUnitsFromBits(FieldOffset);
-    CharUnits AlignedOffsetInBytes =
-        FieldOffsetInBytes.RoundUpToAlignment(Align);
+    CharUnits AlignedOffsetInBytes = FieldOffsetInBytes.alignTo(Align);
     CharUnits NumPaddingBytes = AlignedOffsetInBytes - FieldOffsetInBytes;
 
     if (NumPaddingBytes.isPositive()) {
@@ -2930,8 +3111,10 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
 void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
                               llvm::Optional<unsigned> ArgNo,
                               CGBuilderTy &Builder) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
+  if (VD->hasAttr<NoDebugAttr>())
+    return;
 
   bool Unwritten =
       VD->isImplicit() || (isa<Decl>(VD->getDeclContext()) &&
@@ -2969,7 +3152,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
   // otherwise it is 'self' or 'this'.
   if (isa<ImplicitParamDecl>(VD) && ArgNo && *ArgNo == 1)
     Flags |= llvm::DINode::FlagObjectPointer;
-  if (llvm::Argument *Arg = dyn_cast<llvm::Argument>(Storage))
+  if (auto *Arg = dyn_cast<llvm::Argument>(Storage))
     if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() &&
         !VD->getType()->isPointerType())
       Expr.push_back(llvm::dwarf::DW_OP_deref);
@@ -3005,10 +3188,10 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
       return;
     } else if (isa<VariableArrayType>(VD->getType()))
       Expr.push_back(llvm::dwarf::DW_OP_deref);
-  } else if (const RecordType *RT = dyn_cast<RecordType>(VD->getType())) {
+  } else if (const auto *RT = dyn_cast<RecordType>(VD->getType())) {
     // If VD is an anonymous union then Storage represents value for
     // all union fields.
-    const RecordDecl *RD = cast<RecordDecl>(RT->getDecl());
+    const auto *RD = cast<RecordDecl>(RT->getDecl());
     if (RD->isUnion() && RD->isAnonymousStructOrUnion()) {
       // GDB has trouble finding local variables in anonymous unions, so we emit
       // artifical local variables for each of the members.
@@ -3056,7 +3239,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
 void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
                                             llvm::Value *Storage,
                                             CGBuilderTy &Builder) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   EmitDeclare(VD, Storage, llvm::None, Builder);
 }
 
@@ -3071,11 +3254,13 @@ llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy,
 void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
     const VarDecl *VD, llvm::Value *Storage, CGBuilderTy &Builder,
     const CGBlockInfo &blockInfo, llvm::Instruction *InsertPoint) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
 
   if (Builder.GetInsertBlock() == nullptr)
     return;
+  if (VD->hasAttr<NoDebugAttr>())
+    return;
 
   bool isByRef = VD->hasAttr<BlocksAttr>();
 
@@ -3139,7 +3324,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
 void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI,
                                            unsigned ArgNo,
                                            CGBuilderTy &Builder) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   EmitDeclare(VD, AI, ArgNo, Builder);
 }
 
@@ -3158,7 +3343,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
                                                        unsigned ArgNo,
                                                        llvm::Value *LocalAddr,
                                                        CGBuilderTy &Builder) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
   ASTContext &C = CGM.getContext();
   const BlockDecl *blockDecl = block.getBlockDecl();
 
@@ -3175,25 +3360,25 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
       CGM.getDataLayout().getStructLayout(block.StructureType);
 
   SmallVector<llvm::Metadata *, 16> fields;
-  fields.push_back(createFieldType("__isa", C.VoidPtrTy, 0, loc, AS_public,
+  fields.push_back(createFieldType("__isa", C.VoidPtrTy, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(0),
                                    tunit, tunit));
-  fields.push_back(createFieldType("__flags", C.IntTy, 0, loc, AS_public,
+  fields.push_back(createFieldType("__flags", C.IntTy, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(1),
                                    tunit, tunit));
-  fields.push_back(createFieldType("__reserved", C.IntTy, 0, loc, AS_public,
+  fields.push_back(createFieldType("__reserved", C.IntTy, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(2),
                                    tunit, tunit));
   auto *FnTy = block.getBlockExpr()->getFunctionType();
   auto FnPtrType = CGM.getContext().getPointerType(FnTy->desugar());
-  fields.push_back(createFieldType("__FuncPtr", FnPtrType, 0, loc, AS_public,
+  fields.push_back(createFieldType("__FuncPtr", FnPtrType, loc, AS_public,
                                    blockLayout->getElementOffsetInBits(3),
                                    tunit, tunit));
   fields.push_back(createFieldType(
       "__descriptor", C.getPointerType(block.NeedsCopyDispose
                                            ? C.getBlockDescriptorExtendedType()
                                            : C.getBlockDescriptorType()),
-      0, loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit));
+      loc, AS_public, blockLayout->getElementOffsetInBits(4), tunit, tunit));
 
   // We want to sort the captures by offset, not because DWARF
   // requires this, but because we're paranoid about debuggers.
@@ -3227,19 +3412,22 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
   // Sort by offset.
   llvm::array_pod_sort(chunks.begin(), chunks.end());
 
-  for (SmallVectorImpl<BlockLayoutChunk>::iterator i = chunks.begin(),
-                                                   e = chunks.end();
-       i != e; ++i) {
-    uint64_t offsetInBits = i->OffsetInBits;
-    const BlockDecl::Capture *capture = i->Capture;
+  for (const BlockLayoutChunk &Chunk : chunks) {
+    uint64_t offsetInBits = Chunk.OffsetInBits;
+    const BlockDecl::Capture *capture = Chunk.Capture;
 
     // If we have a null capture, this must be the C++ 'this' capture.
     if (!capture) {
-      const CXXMethodDecl *method =
-          cast<CXXMethodDecl>(blockDecl->getNonClosureContext());
-      QualType type = method->getThisType(C);
+      QualType type;
+      if (auto *Method =
+              cast_or_null<CXXMethodDecl>(blockDecl->getNonClosureContext()))
+        type = Method->getThisType(C);
+      else if (auto *RDecl = dyn_cast<CXXRecordDecl>(blockDecl->getParent()))
+        type = QualType(RDecl->getTypeForDecl(), 0);
+      else
+        llvm_unreachable("unexpected block declcontext");
 
-      fields.push_back(createFieldType("this", type, 0, loc, AS_public,
+      fields.push_back(createFieldType("this", type, loc, AS_public,
                                        offsetInBits, tunit, tunit));
       continue;
     }
@@ -3259,7 +3447,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
           DBuilder.createMemberType(tunit, name, tunit, line, PtrInfo.Width,
                                     PtrInfo.Align, offsetInBits, 0, fieldType);
     } else {
-      fieldType = createFieldType(name, variable->getType(), 0, loc, AS_public,
+      fieldType = createFieldType(name, variable->getType(), loc, AS_public,
                                   offsetInBits, tunit, tunit);
     }
     fields.push_back(fieldType);
@@ -3328,8 +3516,7 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls(
 
     // Ignore unnamed fields, but recurse into anonymous records.
     if (FieldName.empty()) {
-      const RecordType *RT = dyn_cast<RecordType>(Field->getType());
-      if (RT)
+      if (const auto *RT = dyn_cast<RecordType>(Field->getType()))
         GV = CollectAnonRecordDecls(RT->getDecl(), Unit, LineNo, LinkageName,
                                     Var, DContext);
       continue;
@@ -3337,14 +3524,16 @@ llvm::DIGlobalVariable *CGDebugInfo::CollectAnonRecordDecls(
     // Use VarDecl's Tag, Scope and Line number.
     GV = DBuilder.createGlobalVariable(DContext, FieldName, LinkageName, Unit,
                                        LineNo, FieldTy,
-                                       Var->hasInternalLinkage(), Var, nullptr);
+                                       Var->hasLocalLinkage(), Var, nullptr);
   }
   return GV;
 }
 
 void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
                                      const VarDecl *D) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
+  if (D->hasAttr<NoDebugAttr>())
+    return;
   // Create global variable debug descriptor.
   llvm::DIFile *Unit = nullptr;
   llvm::DIScope *DContext = nullptr;
@@ -3368,21 +3557,23 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
   } else {
     GV = DBuilder.createGlobalVariable(
         DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit),
-        Var->hasInternalLinkage(), Var,
+        Var->hasLocalLinkage(), Var,
         getOrCreateStaticDataMemberDeclarationOrNull(D));
   }
-  DeclCache[D->getCanonicalDecl()].reset(static_cast<llvm::Metadata *>(GV));
+  DeclCache[D->getCanonicalDecl()].reset(GV);
 }
 
 void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD,
                                      llvm::Constant *Init) {
-  assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(DebugKind >= codegenoptions::LimitedDebugInfo);
+  if (VD->hasAttr<NoDebugAttr>())
+    return;
   // Create the descriptor for the variable.
   llvm::DIFile *Unit = getOrCreateFile(VD->getLocation());
   StringRef Name = VD->getName();
   llvm::DIType *Ty = getOrCreateType(VD->getType(), Unit);
-  if (const EnumConstantDecl *ECD = dyn_cast<EnumConstantDecl>(VD)) {
-    const EnumDecl *ED = cast<EnumDecl>(ECD->getDeclContext());
+  if (const auto *ECD = dyn_cast<EnumConstantDecl>(VD)) {
+    const auto *ED = cast<EnumDecl>(ECD->getDeclContext());
     assert(isa<EnumType>(ED->getTypeForDecl()) && "Enum without EnumType?");
     Ty = getOrCreateType(QualType(ED->getTypeForDecl(), 0), Unit);
   }
@@ -3400,6 +3591,9 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD,
     auto *RD = cast<RecordDecl>(VarD->getDeclContext());
     getDeclContextDescriptor(VarD);
     // Ensure that the type is retained even though it's otherwise unreferenced.
+    //
+    // FIXME: This is probably unnecessary, since Ty should reference RD
+    // through its scope.
     RetainedTypes.push_back(
         CGM.getContext().getRecordType(RD).getAsOpaquePtr());
     return;
@@ -3423,7 +3617,7 @@ llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
 }
 
 void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
-  if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
+  if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
     return;
   const NamespaceDecl *NSDecl = UD.getNominatedNamespace();
   if (!NSDecl->isAnonymousNamespace() || 
@@ -3436,13 +3630,23 @@ void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
 }
 
 void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
-  if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
+  if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
     return;
   assert(UD.shadow_size() &&
          "We shouldn't be codegening an invalid UsingDecl containing no decls");
   // Emitting one decl is sufficient - debuggers can detect that this is an
   // overloaded name & provide lookup for all the overloads.
   const UsingShadowDecl &USD = **UD.shadow_begin();
+
+  // FIXME: Skip functions with undeduced auto return type for now since we
+  // don't currently have the plumbing for separate declarations & definitions
+  // of free functions and mismatched types (auto in the declaration, concrete
+  // return type in the definition)
+  if (const auto *FD = dyn_cast<FunctionDecl>(USD.getUnderlyingDecl()))
+    if (const auto *AT =
+            FD->getType()->getAs<FunctionProtoType>()->getContainedAutoType())
+      if (AT->getDeducedType().isNull())
+        return;
   if (llvm::DINode *Target =
           getDeclarationOrDefinition(USD.getUnderlyingDecl()))
     DBuilder.createImportedDeclaration(
@@ -3451,6 +3655,8 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
 }
 
 void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
+  if (CGM.getCodeGenOpts().getDebuggerTuning() != llvm::DebuggerKind::LLDB)
+    return;
   if (Module *M = ID.getImportedModule()) {
     auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
     DBuilder.createImportedDeclaration(
@@ -3462,13 +3668,13 @@ void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
 
 llvm::DIImportedEntity *
 CGDebugInfo::EmitNamespaceAlias(const NamespaceAliasDecl &NA) {
-  if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
+  if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
     return nullptr;
   auto &VH = NamespaceAliasCache[&NA];
   if (VH)
     return cast<llvm::DIImportedEntity>(VH);
   llvm::DIImportedEntity *R;
-  if (const NamespaceAliasDecl *Underlying =
+  if (const auto *Underlying =
           dyn_cast<NamespaceAliasDecl>(NA.getAliasedNamespace()))
     // This could cache & dedup here rather than relying on metadata deduping.
     R = DBuilder.createImportedDeclaration(
@@ -3557,7 +3763,7 @@ void CGDebugInfo::finalize() {
 }
 
 void CGDebugInfo::EmitExplicitCastType(QualType Ty) {
-  if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
+  if (CGM.getCodeGenOpts().getDebugInfo() < codegenoptions::LimitedDebugInfo)
     return;
 
   if (auto *DieTy = getOrCreateType(Ty, getOrCreateMainFile()))
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
index a68dd33f..366dd81 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDebugInfo.h
@@ -16,6 +16,7 @@
 
 #include "CGBuilder.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Frontend/CodeGenOptions.h"
@@ -52,28 +53,20 @@ class CGDebugInfo {
   friend class ApplyDebugLocation;
   friend class SaveAndRestoreLocation;
   CodeGenModule &CGM;
-  const CodeGenOptions::DebugInfoKind DebugKind;
+  const codegenoptions::DebugInfoKind DebugKind;
   bool DebugTypeExtRefs;
   llvm::DIBuilder DBuilder;
   llvm::DICompileUnit *TheCU = nullptr;
   ModuleMap *ClangModuleMap = nullptr;
+  ExternalASTSource::ASTSourceDescriptor PCHDescriptor;
   SourceLocation CurLoc;
   llvm::DIType *VTablePtrType = nullptr;
   llvm::DIType *ClassTy = nullptr;
   llvm::DICompositeType *ObjTy = nullptr;
   llvm::DIType *SelTy = nullptr;
-  llvm::DIType *OCLImage1dDITy = nullptr;
-  llvm::DIType *OCLImage1dArrayDITy = nullptr;
-  llvm::DIType *OCLImage1dBufferDITy = nullptr;
-  llvm::DIType *OCLImage2dDITy = nullptr;
-  llvm::DIType *OCLImage2dArrayDITy = nullptr;
-  llvm::DIType *OCLImage2dDepthDITy = nullptr;
-  llvm::DIType *OCLImage2dArrayDepthDITy = nullptr;
-  llvm::DIType *OCLImage2dMSAADITy = nullptr;
-  llvm::DIType *OCLImage2dArrayMSAADITy = nullptr;
-  llvm::DIType *OCLImage2dMSAADepthDITy = nullptr;
-  llvm::DIType *OCLImage2dArrayMSAADepthDITy = nullptr;
-  llvm::DIType *OCLImage3dDITy = nullptr;
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  llvm::DIType *SingletonId = nullptr;
+#include "clang/Basic/OpenCLImageTypes.def"
   llvm::DIType *OCLEventDITy = nullptr;
   llvm::DIType *OCLClkEventDITy = nullptr;
   llvm::DIType *OCLQueueDITy = nullptr;
@@ -107,7 +100,7 @@ class CGDebugInfo {
   /// compilation.
   std::vector<std::pair<const TagType *, llvm::TrackingMDRef>> ReplaceMap;
 
-  /// Cache of replaceable forward declarartions (functions and
+  /// Cache of replaceable forward declarations (functions and
   /// variables) to RAUW at the end of compilation.
   std::vector<std::pair<const DeclaratorDecl *, llvm::TrackingMDRef>>
       FwdDeclReplaceMap;
@@ -239,11 +232,16 @@ class CGDebugInfo {
                            llvm::DIFile *F);
 
   llvm::DIType *createFieldType(StringRef name, QualType type,
-                                uint64_t sizeInBitsOverride, SourceLocation loc,
-                                AccessSpecifier AS, uint64_t offsetInBits,
-                                llvm::DIFile *tunit, llvm::DIScope *scope,
+                                SourceLocation loc, AccessSpecifier AS,
+                                uint64_t offsetInBits, llvm::DIFile *tunit,
+                                llvm::DIScope *scope,
                                 const RecordDecl *RD = nullptr);
 
+  /// Create new bit field member.
+  llvm::DIType *createBitFieldType(const FieldDecl *BitFieldDecl,
+                                   llvm::DIScope *RecordTy,
+                                   const RecordDecl *RD);
+
   /// Helpers for collecting fields of a record.
   /// @{
   void CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
@@ -275,6 +273,8 @@ public:
 
   void finalize();
 
+  /// Module debugging: Support for building PCMs.
+  /// @{
   /// Set the main CU's DwoId field to \p Signature.
   void setDwoId(uint64_t Signature);
 
@@ -283,6 +283,14 @@ public:
   /// the module of origin of each Decl.
   void setModuleMap(ModuleMap &MMap) { ClangModuleMap = &MMap; }
 
+  /// When generating debug information for a clang module or
+  /// precompiled header, this module map will be used to determine
+  /// the module of origin of each Decl.
+  void setPCHDescriptor(ExternalASTSource::ASTSourceDescriptor PCH) {
+    PCHDescriptor = PCH;
+  }
+  /// @}
+
   /// Update the current source location. If \arg loc is invalid it is
   /// ignored.
   void setLocation(SourceLocation Loc);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
index b78e80d..89407cd 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDecl.cpp
@@ -16,11 +16,13 @@
 #include "CGCleanup.h"
 #include "CGDebugInfo.h"
 #include "CGOpenCLRuntime.h"
+#include "CGOpenMPRuntime.h"
 #include "CodeGenModule.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
@@ -29,10 +31,10 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
+
 using namespace clang;
 using namespace CodeGen;
 
-
 void CodeGenFunction::EmitDecl(const Decl &D) {
   switch (D.getKind()) {
   case Decl::BuiltinTemplate:
@@ -71,6 +73,8 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
   case Decl::ObjCImplementation:
   case Decl::ObjCProperty:
   case Decl::ObjCCompatibleAlias:
+  case Decl::PragmaComment:
+  case Decl::PragmaDetectMismatch:
   case Decl::AccessSpec:
   case Decl::LinkageSpec:
   case Decl::ObjCPropertyImpl:
@@ -81,6 +85,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
   case Decl::Captured:
   case Decl::ClassScopeFunctionSpecialization:
   case Decl::UsingShadow:
+  case Decl::ConstructorUsingShadow:
   case Decl::ObjCTypeParam:
     llvm_unreachable("Declaration should not be in declstmts!");
   case Decl::Function:  // void X();
@@ -92,6 +97,7 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
   case Decl::Label:        // __label__ x;
   case Decl::Import:
   case Decl::OMPThreadPrivate:
+  case Decl::OMPCapturedExpr:
   case Decl::Empty:
     // None of these decls require codegen support.
     return;
@@ -115,6 +121,9 @@ void CodeGenFunction::EmitDecl(const Decl &D) {
     return EmitVarDecl(VD);
   }
 
+  case Decl::OMPDeclareReduction:
+    return CGM.EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(&D), this);
+
   case Decl::Typedef:      // typedef int X;
   case Decl::TypeAlias: {  // using X = int; [C++0x]
     const TypedefNameDecl &TD = cast<TypedefNameDecl>(D);
@@ -363,8 +372,15 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
 
   llvm::GlobalVariable *var =
     cast<llvm::GlobalVariable>(addr->stripPointerCasts());
+
+  // CUDA's local and local static __shared__ variables should not
+  // have any non-empty initializers. This is ensured by Sema.
+  // Whatever initializer such variable may have when it gets here is
+  // a no-op and should not be emitted.
+  bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+                         D.hasAttr<CUDASharedAttr>();
   // If this value has an initializer, emit it.
-  if (D.getInit())
+  if (D.getInit() && !isCudaSharedVar)
     var = AddInitializerToStaticVarDecl(D, var);
 
   var->setAlignment(alignment.getQuantity());
@@ -394,7 +410,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   // Emit global variable debug descriptor for static vars.
   CGDebugInfo *DI = getDebugInfo();
   if (DI &&
-      CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) {
+      CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) {
     DI->setLocation(D.getLocation());
     DI->EmitGlobalVariable(var, &D);
   }
@@ -513,20 +529,7 @@ namespace {
       CGF.EmitCall(FnInfo, CleanupFn, ReturnValueSlot(), Args);
     }
   };
-
-  /// A cleanup to call @llvm.lifetime.end.
-  class CallLifetimeEnd final : public EHScopeStack::Cleanup {
-    llvm::Value *Addr;
-    llvm::Value *Size;
-  public:
-    CallLifetimeEnd(Address addr, llvm::Value *size)
-      : Addr(addr.getPointer()), Size(size) {}
-
-    void Emit(CodeGenFunction &CGF, Flags flags) override {
-      CGF.EmitLifetimeEnd(Size, Addr);
-    }
-  };
-}
+} // end anonymous namespace
 
 /// EmitAutoVarWithLifetime - Does the setup required for an automatic
 /// variable with lifetime.
@@ -644,7 +647,6 @@ static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF,
     }
 
     init = castExpr->getSubExpr();
-    continue;
   }
   return false;
 }
@@ -665,10 +667,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
     EmitStoreThroughLValue(RValue::get(value), lvalue, true);
     return;
   }
-  
+
   if (const CXXDefaultInitExpr *DIE = dyn_cast<CXXDefaultInitExpr>(init))
     init = DIE->getExpr();
-    
+
   // If we're emitting a value with lifetime, we have to do the
   // initialization *before* we leave the cleanup scopes.
   if (const ExprWithCleanups *ewc = dyn_cast<ExprWithCleanups>(init)) {
@@ -715,8 +717,7 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
     llvm_unreachable("present but none");
 
   case Qualifiers::OCL_ExplicitNone:
-    // nothing to do
-    value = EmitScalarExpr(init);
+    value = EmitARCUnsafeUnretainedScalarExpr(init);
     break;
 
   case Qualifiers::OCL_Strong: {
@@ -819,7 +820,7 @@ static bool canEmitInitWithFewStoresAfterMemset(llvm::Constant *Init,
     }
     return true;
   }
-  
+
   if (llvm::ConstantDataSequential *CDS =
         dyn_cast<llvm::ConstantDataSequential>(Init)) {
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
@@ -848,9 +849,9 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
     Builder.CreateDefaultAlignedStore(Init, Loc, isVolatile);
     return;
   }
-  
-  if (llvm::ConstantDataSequential *CDS = 
-        dyn_cast<llvm::ConstantDataSequential>(Init)) {
+
+  if (llvm::ConstantDataSequential *CDS =
+          dyn_cast<llvm::ConstantDataSequential>(Init)) {
     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
       llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
@@ -877,7 +878,6 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
   }
 }
 
-
 /// shouldUseMemSetPlusStoresToInitialize - Decide whether we should use memset
 /// plus some stores to initialize a local variable instead of using a memcpy
 /// from a constant global.  It is beneficial to use memset if the global is all
@@ -907,18 +907,29 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) {
   EmitAutoVarCleanups(emission);
 }
 
+/// shouldEmitLifetimeMarkers - Decide whether we need emit the life-time
+/// markers.
+static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
+                                      const LangOptions &LangOpts) {
+  // Asan uses markers for use-after-scope checks.
+  if (CGOpts.SanitizeAddressUseAfterScope)
+    return true;
+
+  // Disable lifetime markers in msan builds.
+  // FIXME: Remove this when msan works with lifetime markers.
+  if (LangOpts.Sanitize.has(SanitizerKind::Memory))
+    return false;
+
+  // For now, only in optimized builds.
+  return CGOpts.OptimizationLevel != 0;
+}
+
 /// Emit a lifetime.begin marker if some criteria are satisfied.
 /// \return a pointer to the temporary size Value if a marker was emitted, null
 /// otherwise
 llvm::Value *CodeGenFunction::EmitLifetimeStart(uint64_t Size,
                                                 llvm::Value *Addr) {
-  // For now, only in optimized builds.
-  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
-    return nullptr;
-
-  // Disable lifetime markers in msan builds.
-  // FIXME: Remove this when msan works with lifetime markers.
-  if (getLangOpts().Sanitize.has(SanitizerKind::Memory))
+  if (!shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), getLangOpts()))
     return nullptr;
 
   llvm::Value *SizeV = llvm::ConstantInt::get(Int64Ty, Size);
@@ -1086,8 +1097,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
   // Emit debug info for local var declaration.
   if (HaveInsertPoint())
     if (CGDebugInfo *DI = getDebugInfo()) {
-      if (CGM.getCodeGenOpts().getDebugInfo()
-            >= CodeGenOptions::LimitedDebugInfo) {
+      if (CGM.getCodeGenOpts().getDebugInfo() >=
+          codegenoptions::LimitedDebugInfo) {
         DI->setLocation(D.getLocation());
         DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
       }
@@ -1163,6 +1174,7 @@ bool CodeGenFunction::isTrivialInitializer(const Expr *Init) {
 
   return false;
 }
+
 void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   assert(emission.Variable && "emission was not valid!");
 
@@ -1250,7 +1262,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
                                llvm::GlobalValue::PrivateLinkage,
                                constant, Name);
     GV->setAlignment(Loc.getAlignment().getQuantity());
-    GV->setUnnamedAddr(true);
+    GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
     Address SrcPtr = Address(GV, Loc.getAlignment());
     if (SrcPtr.getType() != BP)
@@ -1381,13 +1393,10 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) {
 
   // Make sure we call @llvm.lifetime.end.  This needs to happen
   // *last*, so the cleanup needs to be pushed *first*.
-  if (emission.useLifetimeMarkers()) {
-    EHStack.pushCleanup<CallLifetimeEnd>(NormalCleanup,
+  if (emission.useLifetimeMarkers())
+    EHStack.pushCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker,
                                          emission.getAllocatedAddress(),
                                          emission.getSizeForLifetimeMarkers());
-    EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
-    cleanup.setLifetimeMarker();
-  }
 
   // Check the type for a cleanup.
   if (QualType::DestructionKind dtorKind = D.getType().isDestructedType())
@@ -1662,7 +1671,7 @@ namespace {
                               ElementType, ElementAlign, Destroyer);
     }
   };
-}
+} // end anonymous namespace
 
 /// pushIrregularPartialArrayCleanup - Push an EH cleanup to destroy
 /// already-constructed elements of the given array.  The cleanup
@@ -1731,7 +1740,7 @@ namespace {
       CGF.EmitARCRelease(Param, Precise);
     }
   };
-}
+} // end anonymous namespace
 
 /// Emit an alloca (or GlobalValue depending on target)
 /// for the specified parameter and set up LocalDeclMap.
@@ -1852,8 +1861,8 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
 
   // Emit debug info for param declaration.
   if (CGDebugInfo *DI = getDebugInfo()) {
-    if (CGM.getCodeGenOpts().getDebugInfo()
-          >= CodeGenOptions::LimitedDebugInfo) {
+    if (CGM.getCodeGenOpts().getDebugInfo() >=
+        codegenoptions::LimitedDebugInfo) {
       DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder);
     }
   }
@@ -1861,3 +1870,10 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
   if (D.hasAttr<AnnotateAttr>())
     EmitVarAnnotations(&D, DeclPtr.getPointer());
 }
+
+void CodeGenModule::EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
+                                            CodeGenFunction *CGF) {
+  if (!LangOpts.OpenMP || (!LangOpts.EmitAllDecls && !D->isUsed()))
+    return;
+  getOpenMPRuntime().emitUserDefinedReduction(CGF, D);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
index adba731..89d142e 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -86,13 +86,21 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
   llvm::Constant *function;
   llvm::Constant *argument;
 
-  // Special-case non-array C++ destructors, where there's a function
-  // with the right signature that we can just call.
-  const CXXRecordDecl *record = nullptr;
-  if (dtorKind == QualType::DK_cxx_destructor &&
-      (record = type->getAsCXXRecordDecl())) {
-    assert(!record->hasTrivialDestructor());
-    CXXDestructorDecl *dtor = record->getDestructor();
+  // Special-case non-array C++ destructors, if they have the right signature.
+  // Under some ABIs, destructors return this instead of void, and cannot be
+  // passed directly to __cxa_atexit if the target does not allow this mismatch.
+  const CXXRecordDecl *Record = type->getAsCXXRecordDecl();
+  bool CanRegisterDestructor =
+      Record && (!CGM.getCXXABI().HasThisReturn(
+                     GlobalDecl(Record->getDestructor(), Dtor_Complete)) ||
+                 CGM.getCXXABI().canCallMismatchedFunctionType());
+  // If __cxa_atexit is disabled via a flag, a different helper function is
+  // generated elsewhere which uses atexit instead, and it takes the destructor
+  // directly.
+  bool UsingExternalHelper = !CGM.getCodeGenOpts().CXAAtExit;
+  if (Record && (CanRegisterDestructor || UsingExternalHelper)) {
+    assert(!Record->hasTrivialDestructor());
+    CXXDestructorDecl *dtor = Record->getDestructor();
 
     function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete);
     argument = llvm::ConstantExpr::getBitCast(
@@ -304,6 +312,17 @@ void
 CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
                                             llvm::GlobalVariable *Addr,
                                             bool PerformInit) {
+
+  // According to E.2.3.1 in CUDA-7.5 Programming guide: __device__,
+  // __constant__ and __shared__ variables defined in namespace scope,
+  // that are of class type, cannot have a non-empty constructor. All
+  // the checks have been done in Sema by now. Whatever initializers
+  // are allowed are empty and we just need to ignore them here.
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+      (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+       D->hasAttr<CUDASharedAttr>()))
+    return;
+
   // Check if we've already initialized this decl.
   auto I = DelayedCXXInitPosition.find(D);
   if (I != DelayedCXXInitPosition.end() && I->second == ~0U)
@@ -587,8 +606,8 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
                         getContext().VoidPtrTy);
   args.push_back(&dst);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      getContext().VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args);
   llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
   llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(
       FTy, "__cxx_global_array_dtor", FI, VD->getLocation());
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
index fce2e75..4a7dc42 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
@@ -686,8 +686,10 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() {
   assert(EHStack.requiresLandingPad());
   assert(!EHStack.empty());
 
-  // If exceptions are disabled, there are usually no landingpads. However, when
-  // SEH is enabled, functions using SEH still get landingpads.
+  // If exceptions are disabled and SEH is not in use, then there is no invoke
+  // destination. SEH "works" even if exceptions are off. In practice, this
+  // means that C++ destructors and other EH cleanups don't run, which is
+  // consistent with MSVC's behavior.
   const LangOptions &LO = CGM.getLangOpts();
   if (!LO.Exceptions) {
     if (!LO.Borland && !LO.MicrosoftExt)
@@ -1326,11 +1328,13 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() {
   TerminateHandler = createBasicBlock("terminate.handler");
   Builder.SetInsertPoint(TerminateHandler);
   llvm::Value *Exn = nullptr;
+  SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
+      CurrentFuncletPad);
   if (EHPersonality::get(*this).usesFuncletPads()) {
     llvm::Value *ParentPad = CurrentFuncletPad;
     if (!ParentPad)
       ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
-    Builder.CreateCleanupPad(ParentPad);
+    CurrentFuncletPad = Builder.CreateCleanupPad(ParentPad);
   } else {
     if (getLangOpts().CPlusPlus)
       Exn = getExceptionFromSlot();
@@ -1422,12 +1426,8 @@ struct PerformSEHFinally final : EHScopeStack::Cleanup {
     Args.add(RValue::get(FP), ArgTys[1]);
 
     // Arrange a two-arg function info and type.
-    FunctionProtoType::ExtProtoInfo EPI;
-    const auto *FPT = cast<FunctionProtoType>(
-        Context.getFunctionType(Context.VoidTy, ArgTys, EPI));
     const CGFunctionInfo &FnInfo =
-        CGM.getTypes().arrangeFreeFunctionCall(Args, FPT,
-                                               /*chainCall=*/false);
+        CGM.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, Args);
 
     CGF.EmitCall(FnInfo, OutlinedFinally, ReturnValueSlot(), Args);
   }
@@ -1623,14 +1623,13 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
   SmallString<128> Name;
   {
     llvm::raw_svector_ostream OS(Name);
-    const Decl *ParentCodeDecl = ParentCGF.CurCodeDecl;
-    const NamedDecl *Parent = dyn_cast_or_null<NamedDecl>(ParentCodeDecl);
-    assert(Parent && "FIXME: handle unnamed decls (lambdas, blocks) with SEH");
+    const FunctionDecl *ParentSEHFn = ParentCGF.CurSEHParent;
+    assert(ParentSEHFn && "No CurSEHParent!");
     MangleContext &Mangler = CGM.getCXXABI().getMangleContext();
     if (IsFilter)
-      Mangler.mangleSEHFilterExpression(Parent, OS);
+      Mangler.mangleSEHFilterExpression(ParentSEHFn, OS);
     else
-      Mangler.mangleSEHFinallyBlock(Parent, OS);
+      Mangler.mangleSEHFinallyBlock(ParentSEHFn, OS);
   }
 
   FunctionArgList Args;
@@ -1656,8 +1655,8 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
   QualType RetTy = IsFilter ? getContext().LongTy : getContext().VoidTy;
 
   llvm::Function *ParentFn = ParentCGF.CurFn;
-  const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      RetTy, Args, FunctionType::ExtInfo(), /*isVariadic=*/false);
+  const CGFunctionInfo &FnInfo =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args);
 
   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
   llvm::Function *Fn = llvm::Function::Create(
@@ -1677,6 +1676,7 @@ void CodeGenFunction::startOutlinedSEHHelper(CodeGenFunction &ParentCGF,
 
   StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args,
                 OutlinedStmt->getLocStart(), OutlinedStmt->getLocStart());
+  CurSEHParent = ParentCGF.CurSEHParent;
 
   CGM.SetLLVMFunctionAttributes(nullptr, FnInfo, CurFn);
   EmitCapturedLocals(ParentCGF, OutlinedStmt, IsFilter);
@@ -1708,12 +1708,6 @@ CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF,
   const Stmt *FinallyBlock = Finally.getBlock();
   startOutlinedSEHHelper(ParentCGF, false, FinallyBlock);
 
-  // Mark finally block calls as nounwind and noinline to make LLVM's job a
-  // little easier.
-  // FIXME: Remove these restrictions in the future.
-  CurFn->addFnAttr(llvm::Attribute::NoUnwind);
-  CurFn->addFnAttr(llvm::Attribute::NoInline);
-
   // Emit the original filter expression, convert to i32, and return.
   EmitStmt(FinallyBlock);
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
index 507ce3d..5f3b290 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp
@@ -11,13 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "CodeGenFunction.h"
 #include "CGCXXABI.h"
 #include "CGCall.h"
+#include "CGCleanup.h"
 #include "CGDebugInfo.h"
 #include "CGObjCRuntime.h"
 #include "CGOpenMPRuntime.h"
 #include "CGRecordLayout.h"
+#include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/ASTContext.h"
@@ -32,6 +33,8 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/SanitizerStats.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -65,8 +68,6 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
 /// block.
 llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
                                                     const Twine &Name) {
-  if (!Builder.isNamePreserving())
-    return new llvm::AllocaInst(Ty, nullptr, "", AllocaInsertPt);
   return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
 }
 
@@ -361,9 +362,16 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
                            ConvertTypeForMem(E->getType())
                              ->getPointerTo(Object.getAddressSpace())),
                        Object.getAlignment());
-      // We should not have emitted the initializer for this temporary as a
-      // constant.
-      assert(!Var->hasInitializer());
+
+      // createReferenceTemporary will promote the temporary to a global with a
+      // constant initializer if it can.  It can only do this to a value of
+      // ARC-manageable type if the value is global and therefore "immune" to
+      // ref-counting operations.  Therefore we have no need to emit either a
+      // dynamic initialization or a cleanup and we can just return the address
+      // of the temporary.
+      if (Var->hasInitializer())
+        return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl);
+
       Var->setInitializer(CGM.EmitNullConstant(E->getType()));
     }
     LValue RefTempDst = MakeAddrLValue(Object, M->getType(),
@@ -416,6 +424,23 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
       EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true);
     }
   } else {
+    switch (M->getStorageDuration()) {
+    case SD_Automatic:
+    case SD_FullExpression:
+      if (auto *Size = EmitLifetimeStart(
+              CGM.getDataLayout().getTypeAllocSize(Object.getElementType()),
+              Object.getPointer())) {
+        if (M->getStorageDuration() == SD_Automatic)
+          pushCleanupAfterFullExpr<CallLifetimeEnd>(NormalEHLifetimeMarker,
+                                                    Object, Size);
+        else
+          pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, Object,
+                                               Size);
+      }
+      break;
+    default:
+      break;
+    }
     EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true);
   }
   pushTemporaryCleanup(*this, M, E, Object);
@@ -577,7 +602,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
 
   if (Checks.size() > 0) {
     llvm::Constant *StaticData[] = {
-      EmitCheckSourceLocation(Loc),
+     EmitCheckSourceLocation(Loc),
       EmitCheckTypeDescriptor(Ty),
       llvm::ConstantInt::get(SizeTy, AlignVal),
       llvm::ConstantInt::get(Int8Ty, TCK)
@@ -824,7 +849,8 @@ Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
                          getNaturalPointeeTypeAlignment(E->getType(), Source));
         }
 
-        if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
+        if (SanOpts.has(SanitizerKind::CFIUnrelatedCast) &&
+            CE->getCastKind() == CK_BitCast) {
           if (auto PT = E->getType()->getAs<PointerType>())
             EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr.getPointer(),
                                       /*MayBeNull=*/true,
@@ -1265,10 +1291,10 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
   }
 
   // Atomic operations have to be done on integral types.
-  if (Ty->isAtomicType() || typeIsSuitableForInlineAtomic(Ty, Volatile)) {
-    LValue lvalue =
+  LValue AtomicLValue =
       LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
-    return EmitAtomicLoad(lvalue, Loc).getScalarVal();
+  if (Ty->isAtomicType() || LValueIsSuitableForInlineAtomic(AtomicLValue)) {
+    return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal();
   }
 
   llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
@@ -1376,12 +1402,11 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
 
   Value = EmitToMemory(Value, Ty);
 
+  LValue AtomicLValue =
+      LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
   if (Ty->isAtomicType() ||
-      (!isInit && typeIsSuitableForInlineAtomic(Ty, Volatile))) {
-    EmitAtomicStore(RValue::get(Value),
-                    LValue::MakeAddr(Addr, Ty, getContext(),
-                                     AlignSource, TBAAInfo),
-                    isInit);
+      (!isInit && LValueIsSuitableForInlineAtomic(AtomicLValue))) {
+    EmitAtomicStore(RValue::get(Value), AtomicLValue, isInit);
     return;
   }
 
@@ -1733,8 +1758,7 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
 
   if (const VectorType *VTy = Dst.getType()->getAs<VectorType>()) {
     unsigned NumSrcElts = VTy->getNumElements();
-    unsigned NumDstElts =
-       cast<llvm::VectorType>(Vec->getType())->getNumElements();
+    unsigned NumDstElts = Vec->getType()->getVectorNumElements();
     if (NumDstElts == NumSrcElts) {
       // Use shuffle vector is the src and destination are the same number of
       // elements and restore the vector mask since it is on the side it will be
@@ -1947,6 +1971,21 @@ LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr,
   return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source);
 }
 
+Address CodeGenFunction::EmitLoadOfPointer(Address Ptr,
+                                           const PointerType *PtrTy,
+                                           AlignmentSource *Source) {
+  llvm::Value *Addr = Builder.CreateLoad(Ptr);
+  return Address(Addr, getNaturalTypeAlignment(PtrTy->getPointeeType(), Source,
+                                               /*forPointeeType=*/true));
+}
+
+LValue CodeGenFunction::EmitLoadOfPointerLValue(Address PtrAddr,
+                                                const PointerType *PtrTy) {
+  AlignmentSource Source;
+  Address Addr = EmitLoadOfPointer(PtrAddr, PtrTy, &Source);
+  return MakeAddrLValue(Addr, PtrTy->getPointeeType(), Source);
+}
+
 static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
                                       const Expr *E, const VarDecl *VD) {
   QualType T = E->getType();
@@ -2066,12 +2105,11 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
       if (auto *FD = LambdaCaptureFields.lookup(VD))
         return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
       else if (CapturedStmtInfo) {
-        auto it = LocalDeclMap.find(VD);
-        if (it != LocalDeclMap.end()) {
-          if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
-            return EmitLoadOfReferenceLValue(it->second, RefTy);
-          }
-          return MakeAddrLValue(it->second, T);
+        auto I = LocalDeclMap.find(VD);
+        if (I != LocalDeclMap.end()) {
+          if (auto RefTy = VD->getType()->getAs<ReferenceType>())
+            return EmitLoadOfReferenceLValue(I->second, RefTy);
+          return MakeAddrLValue(I->second, T);
         }
         LValue CapLVal =
             EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
@@ -2210,13 +2248,15 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
       return LV;
     }
 
-    assert(E->getSubExpr()->getType()->isAnyComplexType());
+    QualType T = ExprTy->castAs<ComplexType>()->getElementType();
 
     Address Component =
       (E->getOpcode() == UO_Real
          ? emitAddrOfRealComponent(LV.getAddress(), LV.getType())
          : emitAddrOfImagComponent(LV.getAddress(), LV.getType()));
-    return MakeAddrLValue(Component, ExprTy, LV.getAlignmentSource());
+    LValue ElemLV = MakeAddrLValue(Component, T, LV.getAlignmentSource());
+    ElemLV.getQuals().addQualifiers(LV.getQuals());
+    return ElemLV;
   }
   case UO_PreInc:
   case UO_PreDec: {
@@ -2302,7 +2342,7 @@ llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), Descriptor->getType(),
       /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, Descriptor);
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   CGM.getSanitizerMetadata()->disableSanitizerForGlobal(GV);
 
   // Remember the descriptor for this type.
@@ -2352,7 +2392,33 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
 
   PresumedLoc PLoc = getContext().getSourceManager().getPresumedLoc(Loc);
   if (PLoc.isValid()) {
-    auto FilenameGV = CGM.GetAddrOfConstantCString(PLoc.getFilename(), ".src");
+    StringRef FilenameString = PLoc.getFilename();
+
+    int PathComponentsToStrip =
+        CGM.getCodeGenOpts().EmitCheckPathComponentsToStrip;
+    if (PathComponentsToStrip < 0) {
+      assert(PathComponentsToStrip != INT_MIN);
+      int PathComponentsToKeep = -PathComponentsToStrip;
+      auto I = llvm::sys::path::rbegin(FilenameString);
+      auto E = llvm::sys::path::rend(FilenameString);
+      while (I != E && --PathComponentsToKeep)
+        ++I;
+
+      FilenameString = FilenameString.substr(I - E);
+    } else if (PathComponentsToStrip > 0) {
+      auto I = llvm::sys::path::begin(FilenameString);
+      auto E = llvm::sys::path::end(FilenameString);
+      while (I != E && PathComponentsToStrip--)
+        ++I;
+
+      if (I != E)
+        FilenameString =
+            FilenameString.substr(I - llvm::sys::path::begin(FilenameString));
+      else
+        FilenameString = llvm::sys::path::filename(FilenameString);
+    }
+
+    auto FilenameGV = CGM.GetAddrOfConstantCString(FilenameString, ".src");
     CGM.getSanitizerMetadata()->disableSanitizerForGlobal(
                           cast<llvm::GlobalVariable>(FilenameGV.getPointer()));
     Filename = FilenameGV.getPointer();
@@ -2483,24 +2549,26 @@ void CodeGenFunction::EmitCheck(
   Branch->setMetadata(llvm::LLVMContext::MD_prof, Node);
   EmitBlock(Handlers);
 
-  // Emit handler arguments and create handler function type.
-  llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
-  auto *InfoPtr =
-      new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
-                               llvm::GlobalVariable::PrivateLinkage, Info);
-  InfoPtr->setUnnamedAddr(true);
-  CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
-
+  // Handler functions take an i8* pointing to the (handler-specific) static
+  // information block, followed by a sequence of intptr_t arguments
+  // representing operand values.
   SmallVector<llvm::Value *, 4> Args;
   SmallVector<llvm::Type *, 4> ArgTypes;
   Args.reserve(DynamicArgs.size() + 1);
   ArgTypes.reserve(DynamicArgs.size() + 1);
 
-  // Handler functions take an i8* pointing to the (handler-specific) static
-  // information block, followed by a sequence of intptr_t arguments
-  // representing operand values.
-  Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy));
-  ArgTypes.push_back(Int8PtrTy);
+  // Emit handler arguments and create handler function type.
+  if (!StaticArgs.empty()) {
+    llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
+    auto *InfoPtr =
+        new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
+                                 llvm::GlobalVariable::PrivateLinkage, Info);
+    InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+    CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
+    Args.push_back(Builder.CreateBitCast(InfoPtr, Int8PtrTy));
+    ArgTypes.push_back(Int8PtrTy);
+  }
+
   for (size_t i = 0, n = DynamicArgs.size(); i != n; ++i) {
     Args.push_back(EmitCheckValue(DynamicArgs[i]));
     ArgTypes.push_back(IntPtrTy);
@@ -2532,10 +2600,9 @@ void CodeGenFunction::EmitCheck(
   EmitBlock(Cont);
 }
 
-void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond,
-                                           llvm::ConstantInt *TypeId,
-                                           llvm::Value *Ptr) {
-  auto &Ctx = getLLVMContext();
+void CodeGenFunction::EmitCfiSlowPathCheck(
+    SanitizerMask Kind, llvm::Value *Cond, llvm::ConstantInt *TypeId,
+    llvm::Value *Ptr, ArrayRef<llvm::Constant *> StaticArgs) {
   llvm::BasicBlock *Cont = createBasicBlock("cfi.cont");
 
   llvm::BasicBlock *CheckBB = createBasicBlock("cfi.slowpath");
@@ -2547,19 +2614,122 @@ void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond,
 
   EmitBlock(CheckBB);
 
-  llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction(
-      "__cfi_slowpath",
-      llvm::FunctionType::get(
-          llvm::Type::getVoidTy(Ctx),
-          {llvm::Type::getInt64Ty(Ctx),
-           llvm::PointerType::getUnqual(llvm::Type::getInt8Ty(Ctx))},
-          false));
-  llvm::CallInst *CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr});
+  bool WithDiag = !CGM.getCodeGenOpts().SanitizeTrap.has(Kind);
+
+  llvm::CallInst *CheckCall;
+  if (WithDiag) {
+    llvm::Constant *Info = llvm::ConstantStruct::getAnon(StaticArgs);
+    auto *InfoPtr =
+        new llvm::GlobalVariable(CGM.getModule(), Info->getType(), false,
+                                 llvm::GlobalVariable::PrivateLinkage, Info);
+    InfoPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+    CGM.getSanitizerMetadata()->disableSanitizerForGlobal(InfoPtr);
+
+    llvm::Constant *SlowPathDiagFn = CGM.getModule().getOrInsertFunction(
+        "__cfi_slowpath_diag",
+        llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy, Int8PtrTy},
+                                false));
+    CheckCall = Builder.CreateCall(
+        SlowPathDiagFn,
+        {TypeId, Ptr, Builder.CreateBitCast(InfoPtr, Int8PtrTy)});
+  } else {
+    llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction(
+        "__cfi_slowpath",
+        llvm::FunctionType::get(VoidTy, {Int64Ty, Int8PtrTy}, false));
+    CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr});
+  }
+
   CheckCall->setDoesNotThrow();
 
   EmitBlock(Cont);
 }
 
+// This function is basically a switch over the CFI failure kind, which is
+// extracted from CFICheckFailData (1st function argument). Each case is either
+// llvm.trap or a call to one of the two runtime handlers, based on
+// -fsanitize-trap and -fsanitize-recover settings.  Default case (invalid
+// failure kind) traps, but this should really never happen.  CFICheckFailData
+// can be nullptr if the calling module has -fsanitize-trap behavior for this
+// check kind; in this case __cfi_check_fail traps as well.
+void CodeGenFunction::EmitCfiCheckFail() {
+  SanitizerScope SanScope(this);
+  FunctionArgList Args;
+  ImplicitParamDecl ArgData(getContext(), nullptr, SourceLocation(), nullptr,
+                            getContext().VoidPtrTy);
+  ImplicitParamDecl ArgAddr(getContext(), nullptr, SourceLocation(), nullptr,
+                            getContext().VoidPtrTy);
+  Args.push_back(&ArgData);
+  Args.push_back(&ArgAddr);
+
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, Args);
+
+  llvm::Function *F = llvm::Function::Create(
+      llvm::FunctionType::get(VoidTy, {VoidPtrTy, VoidPtrTy}, false),
+      llvm::GlobalValue::WeakODRLinkage, "__cfi_check_fail", &CGM.getModule());
+  F->setVisibility(llvm::GlobalValue::HiddenVisibility);
+
+  StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args,
+                SourceLocation());
+
+  llvm::Value *Data =
+      EmitLoadOfScalar(GetAddrOfLocalVar(&ArgData), /*Volatile=*/false,
+                       CGM.getContext().VoidPtrTy, ArgData.getLocation());
+  llvm::Value *Addr =
+      EmitLoadOfScalar(GetAddrOfLocalVar(&ArgAddr), /*Volatile=*/false,
+                       CGM.getContext().VoidPtrTy, ArgAddr.getLocation());
+
+  // Data == nullptr means the calling module has trap behaviour for this check.
+  llvm::Value *DataIsNotNullPtr =
+      Builder.CreateICmpNE(Data, llvm::ConstantPointerNull::get(Int8PtrTy));
+  EmitTrapCheck(DataIsNotNullPtr);
+
+  llvm::StructType *SourceLocationTy =
+      llvm::StructType::get(VoidPtrTy, Int32Ty, Int32Ty, nullptr);
+  llvm::StructType *CfiCheckFailDataTy =
+      llvm::StructType::get(Int8Ty, SourceLocationTy, VoidPtrTy, nullptr);
+
+  llvm::Value *V = Builder.CreateConstGEP2_32(
+      CfiCheckFailDataTy,
+      Builder.CreatePointerCast(Data, CfiCheckFailDataTy->getPointerTo(0)), 0,
+      0);
+  Address CheckKindAddr(V, getIntAlign());
+  llvm::Value *CheckKind = Builder.CreateLoad(CheckKindAddr);
+
+  llvm::Value *AllVtables = llvm::MetadataAsValue::get(
+      CGM.getLLVMContext(),
+      llvm::MDString::get(CGM.getLLVMContext(), "all-vtables"));
+  llvm::Value *ValidVtable = Builder.CreateZExt(
+      Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::type_test),
+                         {Addr, AllVtables}),
+      IntPtrTy);
+
+  const std::pair<int, SanitizerMask> CheckKinds[] = {
+      {CFITCK_VCall, SanitizerKind::CFIVCall},
+      {CFITCK_NVCall, SanitizerKind::CFINVCall},
+      {CFITCK_DerivedCast, SanitizerKind::CFIDerivedCast},
+      {CFITCK_UnrelatedCast, SanitizerKind::CFIUnrelatedCast},
+      {CFITCK_ICall, SanitizerKind::CFIICall}};
+
+  SmallVector<std::pair<llvm::Value *, SanitizerMask>, 5> Checks;
+  for (auto CheckKindMaskPair : CheckKinds) {
+    int Kind = CheckKindMaskPair.first;
+    SanitizerMask Mask = CheckKindMaskPair.second;
+    llvm::Value *Cond =
+        Builder.CreateICmpNE(CheckKind, llvm::ConstantInt::get(Int8Ty, Kind));
+    if (CGM.getLangOpts().Sanitize.has(Mask))
+      EmitCheck(std::make_pair(Cond, Mask), "cfi_check_fail", {},
+                {Data, Addr, ValidVtable});
+    else
+      EmitTrapCheck(Cond);
+  }
+
+  FinishFunction();
+  // The only reference to this function will be created during LTO link.
+  // Make sure it survives until then.
+  CGM.addUsedGlobal(F);
+}
+
 void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) {
   llvm::BasicBlock *Cont = createBasicBlock("cont");
 
@@ -2827,22 +2997,55 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
   return LV;
 }
 
+static Address emitOMPArraySectionBase(CodeGenFunction &CGF, const Expr *Base,
+                                       AlignmentSource &AlignSource,
+                                       QualType BaseTy, QualType ElTy,
+                                       bool IsLowerBound) {
+  LValue BaseLVal;
+  if (auto *ASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParenImpCasts())) {
+    BaseLVal = CGF.EmitOMPArraySectionExpr(ASE, IsLowerBound);
+    if (BaseTy->isArrayType()) {
+      Address Addr = BaseLVal.getAddress();
+      AlignSource = BaseLVal.getAlignmentSource();
+
+      // If the array type was an incomplete type, we need to make sure
+      // the decay ends up being the right type.
+      llvm::Type *NewTy = CGF.ConvertType(BaseTy);
+      Addr = CGF.Builder.CreateElementBitCast(Addr, NewTy);
+
+      // Note that VLA pointers are always decayed, so we don't need to do
+      // anything here.
+      if (!BaseTy->isVariableArrayType()) {
+        assert(isa<llvm::ArrayType>(Addr.getElementType()) &&
+               "Expected pointer to array");
+        Addr = CGF.Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(),
+                                           "arraydecay");
+      }
+
+      return CGF.Builder.CreateElementBitCast(Addr,
+                                              CGF.ConvertTypeForMem(ElTy));
+    }
+    CharUnits Align = CGF.getNaturalTypeAlignment(ElTy, &AlignSource);
+    return Address(CGF.Builder.CreateLoad(BaseLVal.getAddress()), Align);
+  }
+  return CGF.EmitPointerWithAlignment(Base, &AlignSource);
+}
+
 LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
                                                 bool IsLowerBound) {
-  LValue Base;
+  QualType BaseTy;
   if (auto *ASE =
           dyn_cast<OMPArraySectionExpr>(E->getBase()->IgnoreParenImpCasts()))
-    Base = EmitOMPArraySectionExpr(ASE, IsLowerBound);
+    BaseTy = OMPArraySectionExpr::getBaseOriginalType(ASE);
   else
-    Base = EmitLValue(E->getBase());
-  QualType BaseTy = Base.getType();
-  llvm::Value *Idx = nullptr;
+    BaseTy = E->getBase()->getType();
   QualType ResultExprTy;
   if (auto *AT = getContext().getAsArrayType(BaseTy))
     ResultExprTy = AT->getElementType();
   else
     ResultExprTy = BaseTy->getPointeeType();
-  if (IsLowerBound || (!IsLowerBound && E->getColonLoc().isInvalid())) {
+  llvm::Value *Idx = nullptr;
+  if (IsLowerBound || E->getColonLoc().isInvalid()) {
     // Requesting lower bound or upper bound, but without provided length and
     // without ':' symbol for the default length -> length = 1.
     // Idx = LowerBound ?: 0;
@@ -2853,9 +3056,9 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
     } else
       Idx = llvm::ConstantInt::getNullValue(IntPtrTy);
   } else {
-    // Try to emit length or lower bound as constant. If this is possible, 1 is
-    // subtracted from constant length or lower bound. Otherwise, emit LLVM IR
-    // (LB + Len) - 1.
+    // Try to emit length or lower bound as constant. If this is possible, 1
+    // is subtracted from constant length or lower bound. Otherwise, emit LLVM
+    // IR (LB + Len) - 1.
     auto &C = CGM.getContext();
     auto *Length = E->getLength();
     llvm::APSInt ConstLength;
@@ -2901,12 +3104,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
         Idx = llvm::ConstantInt::get(IntPtrTy, ConstLength + ConstLowerBound);
     } else {
       // Idx = ArraySize - 1;
-      if (auto *VAT = C.getAsVariableArrayType(BaseTy)) {
+      QualType ArrayTy = BaseTy->isPointerType()
+                             ? E->getBase()->IgnoreParenImpCasts()->getType()
+                             : BaseTy;
+      if (auto *VAT = C.getAsVariableArrayType(ArrayTy)) {
         Length = VAT->getSizeExpr();
         if (Length->isIntegerConstantExpr(ConstLength, C))
           Length = nullptr;
       } else {
-        auto *CAT = C.getAsConstantArrayType(BaseTy);
+        auto *CAT = C.getAsConstantArrayType(ArrayTy);
         ConstLength = CAT->getSize();
       }
       if (Length) {
@@ -2925,52 +3131,56 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
   }
   assert(Idx);
 
-  llvm::Value *EltPtr;
-  QualType FixedSizeEltType = ResultExprTy;
+  Address EltPtr = Address::invalid();
+  AlignmentSource AlignSource;
   if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) {
+    // The base must be a pointer, which is not an aggregate.  Emit
+    // it.  It needs to be emitted first in case it's what captures
+    // the VLA bounds.
+    Address Base =
+        emitOMPArraySectionBase(*this, E->getBase(), AlignSource, BaseTy,
+                                VLA->getElementType(), IsLowerBound);
     // The element count here is the total number of non-VLA elements.
-    llvm::Value *numElements = getVLASize(VLA).first;
-    FixedSizeEltType = getFixedSizeElementType(getContext(), VLA);
+    llvm::Value *NumElements = getVLASize(VLA).first;
 
     // Effectively, the multiply by the VLA size is part of the GEP.
     // GEP indexes are signed, and scaling an index isn't permitted to
     // signed-overflow, so we use the same semantics for our explicit
     // multiply.  We suppress this if overflow is not undefined behavior.
-    if (getLangOpts().isSignedOverflowDefined()) {
-      Idx = Builder.CreateMul(Idx, numElements);
-      EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx");
-    } else {
-      Idx = Builder.CreateNSWMul(Idx, numElements);
-      EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx");
-    }
-  } else if (BaseTy->isConstantArrayType()) {
-    llvm::Value *ArrayPtr = Base.getPointer();
-    llvm::Value *Zero = llvm::ConstantInt::getNullValue(IntPtrTy);
-    llvm::Value *Args[] = {Zero, Idx};
-
     if (getLangOpts().isSignedOverflowDefined())
-      EltPtr = Builder.CreateGEP(ArrayPtr, Args, "arrayidx");
+      Idx = Builder.CreateMul(Idx, NumElements);
     else
-      EltPtr = Builder.CreateInBoundsGEP(ArrayPtr, Args, "arrayidx");
-  } else {
-    // The base must be a pointer, which is not an aggregate.  Emit it.
-    if (getLangOpts().isSignedOverflowDefined())
-      EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx");
+      Idx = Builder.CreateNSWMul(Idx, NumElements);
+    EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(),
+                                   !getLangOpts().isSignedOverflowDefined());
+  } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
+    // If this is A[i] where A is an array, the frontend will have decayed the
+    // base to be a ArrayToPointerDecay implicit cast.  While correct, it is
+    // inefficient at -O0 to emit a "gep A, 0, 0" when codegen'ing it, then a
+    // "gep x, i" here.  Emit one "gep A, 0, i".
+    assert(Array->getType()->isArrayType() &&
+           "Array to pointer decay must have array source type!");
+    LValue ArrayLV;
+    // For simple multidimensional array indexing, set the 'accessed' flag for
+    // better bounds-checking of the base expression.
+    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Array))
+      ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true);
     else
-      EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx");
-  }
-
-  CharUnits EltAlign =
-    Base.getAlignment().alignmentOfArrayElement(
-                          getContext().getTypeSizeInChars(FixedSizeEltType));
-
-  // Limit the alignment to that of the result type.
-  LValue LV = MakeAddrLValue(Address(EltPtr, EltAlign), ResultExprTy,
-                             Base.getAlignmentSource());
+      ArrayLV = EmitLValue(Array);
 
-  LV.getQuals().setAddressSpace(BaseTy.getAddressSpace());
+    // Propagate the alignment from the array itself to the result.
+    EltPtr = emitArraySubscriptGEP(
+        *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx},
+        ResultExprTy, !getLangOpts().isSignedOverflowDefined());
+    AlignSource = ArrayLV.getAlignmentSource();
+  } else {
+    Address Base = emitOMPArraySectionBase(*this, E->getBase(), AlignSource,
+                                           BaseTy, ResultExprTy, IsLowerBound);
+    EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy,
+                                   !getLangOpts().isSignedOverflowDefined());
+  }
 
-  return LV;
+  return MakeAddrLValue(EltPtr, ResultExprTy, AlignSource);
 }
 
 LValue CodeGenFunction::
@@ -3508,6 +3718,10 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV,
   case TEK_Aggregate:
     return FieldLV.asAggregateRValue();
   case TEK_Scalar:
+    // This routine is used to load fields one-by-one to perform a copy, so
+    // don't load reference fields.
+    if (FD->getType()->isReferenceType())
+      return RValue::get(FieldLV.getPointer());
     return EmitLoadOfLValue(FieldLV, Loc);
   }
   llvm_unreachable("bad evaluation kind");
@@ -3851,25 +4065,28 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
   if (SanOpts.has(SanitizerKind::CFIICall) &&
       (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
     SanitizerScope SanScope(this);
+    EmitSanitizerStatReport(llvm::SanStat_CFI_ICall);
 
     llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0));
-    llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD);
+    llvm::Value *TypeId = llvm::MetadataAsValue::get(getLLVMContext(), MD);
 
     llvm::Value *CastedCallee = Builder.CreateBitCast(Callee, Int8PtrTy);
-    llvm::Value *BitSetTest =
-        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
-                           {CastedCallee, BitSetName});
+    llvm::Value *TypeTest = Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::type_test), {CastedCallee, TypeId});
 
-    auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD);
-    if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && TypeId) {
-      EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedCallee);
+    auto CrossDsoTypeId = CGM.CreateCrossDsoCfiTypeId(MD);
+    llvm::Constant *StaticData[] = {
+        llvm::ConstantInt::get(Int8Ty, CFITCK_ICall),
+        EmitCheckSourceLocation(E->getLocStart()),
+        EmitCheckTypeDescriptor(QualType(FnType, 0)),
+    };
+    if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && CrossDsoTypeId) {
+      EmitCfiSlowPathCheck(SanitizerKind::CFIICall, TypeTest, CrossDsoTypeId,
+                           CastedCallee, StaticData);
     } else {
-      llvm::Constant *StaticData[] = {
-          EmitCheckSourceLocation(E->getLocStart()),
-          EmitCheckTypeDescriptor(QualType(FnType, 0)),
-      };
-      EmitCheck(std::make_pair(BitSetTest, SanitizerKind::CFIICall),
-                "cfi_bad_icall", StaticData, CastedCallee);
+      EmitCheck(std::make_pair(TypeTest, SanitizerKind::CFIICall),
+                "cfi_check_fail", StaticData,
+                {CastedCallee, llvm::UndefValue::get(IntPtrTy)});
     }
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
index a4547a9..6d18843 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp
@@ -175,6 +175,7 @@ public:
   }
   void VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E);
   void VisitCXXConstructExpr(const CXXConstructExpr *E);
+  void VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
   void VisitLambdaExpr(LambdaExpr *E);
   void VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E);
   void VisitExprWithCleanups(ExprWithCleanups *E);
@@ -967,12 +968,9 @@ void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
   Address ArgValue = Address::invalid();
   Address ArgPtr = CGF.EmitVAArg(VE, ArgValue);
 
+  // If EmitVAArg fails, emit an error.
   if (!ArgPtr.isValid()) {
-    // If EmitVAArg fails, we fall back to the LLVM instruction.
-    llvm::Value *Val = Builder.CreateVAArg(ArgValue.getPointer(),
-                                           CGF.ConvertType(VE->getType()));
-    if (!Dest.isIgnored())
-      Builder.CreateStore(Val, Dest.getAddress());
+    CGF.ErrorUnsupported(VE, "aggregate va_arg expression");
     return;
   }
 
@@ -1001,6 +999,14 @@ AggExprEmitter::VisitCXXConstructExpr(const CXXConstructExpr *E) {
   CGF.EmitCXXConstructExpr(E, Slot);
 }
 
+void AggExprEmitter::VisitCXXInheritedCtorInitExpr(
+    const CXXInheritedCtorInitExpr *E) {
+  AggValueSlot Slot = EnsureSlot(E->getType());
+  CGF.EmitInheritedCXXConstructorCall(
+      E->getConstructor(), E->constructsVBase(), Slot.getAddress(),
+      E->inheritedFromVBase(), E);
+}
+
 void
 AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) {
   AggValueSlot Slot = EnsureSlot(E->getType());
@@ -1174,6 +1180,38 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
   unsigned NumInitElements = E->getNumInits();
   RecordDecl *record = E->getType()->castAs<RecordType>()->getDecl();
 
+  // We'll need to enter cleanup scopes in case any of the element
+  // initializers throws an exception.
+  SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
+  llvm::Instruction *cleanupDominator = nullptr;
+
+  unsigned curInitIndex = 0;
+
+  // Emit initialization of base classes.
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(record)) {
+    assert(E->getNumInits() >= CXXRD->getNumBases() &&
+           "missing initializer for base class");
+    for (auto &Base : CXXRD->bases()) {
+      assert(!Base.isVirtual() && "should not see vbases here");
+      auto *BaseRD = Base.getType()->getAsCXXRecordDecl();
+      Address V = CGF.GetAddressOfDirectBaseInCompleteClass(
+          Dest.getAddress(), CXXRD, BaseRD,
+          /*isBaseVirtual*/ false);
+      AggValueSlot AggSlot =
+        AggValueSlot::forAddr(V, Qualifiers(),
+                              AggValueSlot::IsDestructed,
+                              AggValueSlot::DoesNotNeedGCBarriers,
+                              AggValueSlot::IsNotAliased);
+      CGF.EmitAggExpr(E->getInit(curInitIndex++), AggSlot);
+
+      if (QualType::DestructionKind dtorKind =
+              Base.getType().isDestructedType()) {
+        CGF.pushDestroy(dtorKind, V, Base.getType());
+        cleanups.push_back(CGF.EHStack.stable_begin());
+      }
+    }
+  }
+
   // Prepare a 'this' for CXXDefaultInitExprs.
   CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());
 
@@ -1207,14 +1245,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
     return;
   }
 
-  // We'll need to enter cleanup scopes in case any of the member
-  // initializers throw an exception.
-  SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
-  llvm::Instruction *cleanupDominator = nullptr;
-
   // Here we iterate over the fields; this makes it simpler to both
   // default-initialize fields and skip over unnamed fields.
-  unsigned curInitIndex = 0;
   for (const auto *field : record->fields()) {
     // We're done once we hit the flexible array member.
     if (field->getType()->isIncompleteArrayType())
@@ -1320,6 +1352,10 @@ static CharUnits GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
       CharUnits NumNonZeroBytes = CharUnits::Zero();
       
       unsigned ILEElement = 0;
+      if (auto *CXXRD = dyn_cast<CXXRecordDecl>(SD))
+        while (ILEElement != CXXRD->getNumBases())
+          NumNonZeroBytes +=
+              GetNumNonZeroBytesInInit(ILE->getInit(ILEElement++), CGF);
       for (const auto *Field : SD->fields()) {
         // We're done once we hit the flexible array member or run out of
         // InitListExpr elements.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
index 604cde7..eec2ace 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprCXX.cpp
@@ -24,10 +24,11 @@
 using namespace clang;
 using namespace CodeGen;
 
-static RequiredArgs commonEmitCXXMemberOrOperatorCall(
-    CodeGenFunction &CGF, const CXXMethodDecl *MD, llvm::Value *Callee,
-    ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam,
-    QualType ImplicitParamTy, const CallExpr *CE, CallArgList &Args) {
+static RequiredArgs
+commonEmitCXXMemberOrOperatorCall(CodeGenFunction &CGF, const CXXMethodDecl *MD,
+                                  llvm::Value *This, llvm::Value *ImplicitParam,
+                                  QualType ImplicitParamTy, const CallExpr *CE,
+                                  CallArgList &Args) {
   assert(CE == nullptr || isa<CXXMemberCallExpr>(CE) ||
          isa<CXXOperatorCallExpr>(CE));
   assert(MD->isInstance() &&
@@ -53,7 +54,7 @@ static RequiredArgs commonEmitCXXMemberOrOperatorCall(
   }
 
   const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
-  RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size());
+  RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, Args.size(), MD);
 
   // And the rest of the call args.
   if (CE) {
@@ -76,21 +77,20 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall(
   const FunctionProtoType *FPT = MD->getType()->castAs<FunctionProtoType>();
   CallArgList Args;
   RequiredArgs required = commonEmitCXXMemberOrOperatorCall(
-      *this, MD, Callee, ReturnValue, This, ImplicitParam, ImplicitParamTy, CE,
-      Args);
+      *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args);
   return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required),
                   Callee, ReturnValue, Args, MD);
 }
 
-RValue CodeGenFunction::EmitCXXStructorCall(
-    const CXXMethodDecl *MD, llvm::Value *Callee, ReturnValueSlot ReturnValue,
-    llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy,
-    const CallExpr *CE, StructorType Type) {
+RValue CodeGenFunction::EmitCXXDestructorCall(
+    const CXXDestructorDecl *DD, llvm::Value *Callee, llvm::Value *This,
+    llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE,
+    StructorType Type) {
   CallArgList Args;
-  commonEmitCXXMemberOrOperatorCall(*this, MD, Callee, ReturnValue, This,
-                                    ImplicitParam, ImplicitParamTy, CE, Args);
-  return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(MD, Type),
-                  Callee, ReturnValue, Args, MD);
+  commonEmitCXXMemberOrOperatorCall(*this, DD, This, ImplicitParam,
+                                    ImplicitParamTy, CE, Args);
+  return EmitCall(CGM.getTypes().arrangeCXXStructorDeclaration(DD, Type),
+                  Callee, ReturnValueSlot(), Args, DD);
 }
 
 static CXXRecordDecl *getCXXRecord(const Expr *E) {
@@ -259,7 +259,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
     if (SanOpts.has(SanitizerKind::CFINVCall) &&
         MD->getParent()->isDynamicClass()) {
       llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy, MD->getParent());
-      EmitVTablePtrCheckForCall(MD, VTable, CFITCK_NVCall, CE->getLocStart());
+      EmitVTablePtrCheckForCall(MD->getParent(), VTable, CFITCK_NVCall,
+                                CE->getLocStart());
     }
 
     if (getLangOpts().AppleKext && MD->isVirtual() && HasQualifier)
@@ -273,7 +274,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
 
   if (MD->isVirtual()) {
     This = CGM.getCXXABI().adjustThisArgumentForVirtualFunctionCall(
-        *this, MD, This, UseVirtualCall);
+        *this, CalleeDecl, This, UseVirtualCall);
   }
 
   return EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This.getPointer(),
@@ -323,10 +324,11 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
   // Push the this ptr.
   Args.add(RValue::get(ThisPtrForCall), ThisType);
 
-  RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1);
-  
+  RequiredArgs required =
+      RequiredArgs::forPrototypePlus(FPT, 1, /*FD=*/nullptr);
+
   // And the rest of the call args
-  EmitCallArgs(Args, FPT, E->arguments(), E->getDirectCallee());
+  EmitCallArgs(Args, FPT, E->arguments());
   return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required),
                   Callee, ReturnValue, Args);
 }
@@ -369,6 +371,9 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF,
   std::vector<CharUnits> VBPtrOffsets =
       CGF.CGM.getCXXABI().getVBPtrOffsets(Base);
   for (CharUnits VBPtrOffset : VBPtrOffsets) {
+    // Stop before we hit any virtual base pointers located in virtual bases.
+    if (VBPtrOffset >= NVSize)
+      break;
     std::pair<CharUnits, CharUnits> LastStore = Stores.pop_back_val();
     CharUnits LastStoreOffset = LastStore.first;
     CharUnits LastStoreSize = LastStore.second;
@@ -471,8 +476,8 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
     }
   }
   
-  if (const ConstantArrayType *arrayType 
-        = getContext().getAsConstantArrayType(E->getType())) {
+  if (const ArrayType *arrayType
+        = getContext().getAsArrayType(E->getType())) {
     EmitCXXAggrConstructorCall(CD, arrayType, Dest.getAddress(), E);
   } else {
     CXXCtorType Type = Ctor_Complete;
@@ -1010,15 +1015,18 @@ void CodeGenFunction::EmitNewArrayInitializer(
   if (auto *ILE = dyn_cast<InitListExpr>(Init)) {
     if (const RecordType *RType = ILE->getType()->getAs<RecordType>()) {
       if (RType->getDecl()->isStruct()) {
-        unsigned NumFields = 0;
+        unsigned NumElements = 0;
+        if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RType->getDecl()))
+          NumElements = CXXRD->getNumBases();
         for (auto *Field : RType->getDecl()->fields())
           if (!Field->isUnnamedBitfield())
-            ++NumFields;
-        if (ILE->getNumInits() == NumFields)
+            ++NumElements;
+        // FIXME: Recurse into nested InitListExprs.
+        if (ILE->getNumInits() == NumElements)
           for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
             if (!isa<ImplicitValueInitExpr>(ILE->getInit(i)))
-              --NumFields;
-        if (ILE->getNumInits() == NumFields && TryMemsetInitialization())
+              --NumElements;
+        if (ILE->getNumInits() == NumElements && TryMemsetInitialization())
           return;
       }
     }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
index ee049f1..803b399 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprConstant.cpp
@@ -111,7 +111,7 @@ AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) {
 
   // Round up the field offset to the alignment of the field type.
   CharUnits AlignedNextFieldOffsetInChars =
-      NextFieldOffsetInChars.RoundUpToAlignment(FieldAlignment);
+      NextFieldOffsetInChars.alignTo(FieldAlignment);
 
   if (AlignedNextFieldOffsetInChars < FieldOffsetInChars) {
     // We need to append padding.
@@ -121,7 +121,7 @@ AppendBytes(CharUnits FieldOffsetInChars, llvm::Constant *InitCst) {
            "Did not add enough padding!");
 
     AlignedNextFieldOffsetInChars =
-        NextFieldOffsetInChars.RoundUpToAlignment(FieldAlignment);
+        NextFieldOffsetInChars.alignTo(FieldAlignment);
   }
 
   if (AlignedNextFieldOffsetInChars > FieldOffsetInChars) {
@@ -162,8 +162,8 @@ void ConstStructBuilder::AppendBitField(const FieldDecl *Field,
   if (FieldOffset > NextFieldOffsetInBits) {
     // We need to add padding.
     CharUnits PadSize = Context.toCharUnitsFromBits(
-      llvm::RoundUpToAlignment(FieldOffset - NextFieldOffsetInBits, 
-                               Context.getTargetInfo().getCharAlign()));
+        llvm::alignTo(FieldOffset - NextFieldOffsetInBits,
+                      Context.getTargetInfo().getCharAlign()));
 
     AppendPadding(PadSize);
   }
@@ -334,7 +334,7 @@ void ConstStructBuilder::ConvertStructToPacked() {
     CharUnits ElementAlign = CharUnits::fromQuantity(
       CGM.getDataLayout().getABITypeAlignment(C->getType()));
     CharUnits AlignedElementOffsetInChars =
-      ElementOffsetInChars.RoundUpToAlignment(ElementAlign);
+        ElementOffsetInChars.alignTo(ElementAlign);
 
     if (AlignedElementOffsetInChars > ElementOffsetInChars) {
       // We need some padding.
@@ -368,7 +368,14 @@ bool ConstStructBuilder::Build(InitListExpr *ILE) {
 
   unsigned FieldNo = 0;
   unsigned ElementNo = 0;
-  
+
+  // Bail out if we have base classes. We could support these, but they only
+  // arise in C++1z where we will have already constant folded most interesting
+  // cases. FIXME: There are still a few more cases we can handle this way.
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+    if (CXXRD->getNumBases())
+      return false;
+
   for (RecordDecl::field_iterator Field = RD->field_begin(),
        FieldEnd = RD->field_end(); Field != FieldEnd; ++Field, ++FieldNo) {
     // If this is a union, skip all the fields that aren't being initialized.
@@ -508,13 +515,12 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
   } else {
     // Append tail padding if necessary.
     CharUnits LLVMSizeInChars =
-        NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment);
+        NextFieldOffsetInChars.alignTo(LLVMStructAlignment);
 
     if (LLVMSizeInChars != LayoutSizeInChars)
       AppendTailPadding(LayoutSizeInChars);
 
-    LLVMSizeInChars =
-        NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment);
+    LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment);
 
     // Check if we need to convert the struct to a packed struct.
     if (NextFieldOffsetInChars <= LayoutSizeInChars &&
@@ -526,8 +532,7 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
              "Converting to packed did not help!");
     }
 
-    LLVMSizeInChars =
-        NextFieldOffsetInChars.RoundUpToAlignment(LLVMStructAlignment);
+    LLVMSizeInChars = NextFieldOffsetInChars.alignTo(LLVMStructAlignment);
 
     assert(LayoutSizeInChars == LLVMSizeInChars &&
            "Tail padding mismatch!");
@@ -546,8 +551,9 @@ llvm::Constant *ConstStructBuilder::Finalize(QualType Ty) {
 
   llvm::Constant *Result = llvm::ConstantStruct::get(STy, Elements);
 
-  assert(NextFieldOffsetInChars.RoundUpToAlignment(getAlignment(Result)) ==
-         getSizeInChars(Result) && "Size mismatch!");
+  assert(NextFieldOffsetInChars.alignTo(getAlignment(Result)) ==
+             getSizeInChars(Result) &&
+         "Size mismatch!");
 
   return Result;
 }
@@ -758,6 +764,12 @@ public:
     return Visit(DIE->getExpr());
   }
 
+  llvm::Constant *VisitExprWithCleanups(ExprWithCleanups *E) {
+    if (!E->cleanupsHaveSideEffects())
+      return Visit(E->getSubExpr());
+    return nullptr;
+  }
+
   llvm::Constant *VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *E) {
     return Visit(E->GetTemporaryExpr());
   }
@@ -1125,6 +1137,13 @@ bool ConstStructBuilder::Build(ConstExprEmitter *Emitter,
   unsigned FieldNo = -1;
   unsigned ElementNo = 0;
 
+  // Bail out if we have base classes. We could support these, but they only
+  // arise in C++1z where we will have already constant folded most interesting
+  // cases. FIXME: There are still a few more cases we can handle this way.
+  if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
+    if (CXXRD->getNumBases())
+      return false;
+
   for (FieldDecl *Field : RD->fields()) {
     ++FieldNo;
 
@@ -1301,8 +1320,14 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
 
       // Convert to the appropriate type; this could be an lvalue for
       // an integer.
-      if (isa<llvm::PointerType>(DestTy))
+      if (isa<llvm::PointerType>(DestTy)) {
+        // Convert the integer to a pointer-sized integer before converting it
+        // to a pointer.
+        C = llvm::ConstantExpr::getIntegerCast(
+            C, getDataLayout().getIntPtrType(DestTy),
+            /*isSigned=*/false);
         return llvm::ConstantExpr::getIntToPtr(C, DestTy);
+      }
 
       // If the types don't match this should only be a truncate.
       if (C->getType() != DestTy)
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
index 268e796..120dacf 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprScalar.cpp
@@ -818,7 +818,7 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
            "Splatted expr doesn't match with vector element type?");
 
     // Splat the element across to all elements
-    unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
+    unsigned NumElements = DstTy->getVectorNumElements();
     return Builder.CreateVectorSplat(NumElements, Src, "splat");
   }
 
@@ -984,8 +984,7 @@ Value *ScalarExprEmitter::VisitExpr(Expr *E) {
 
 Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
   // Vector Mask Case
-  if (E->getNumSubExprs() == 2 ||
-      (E->getNumSubExprs() == 3 && E->getExpr(2)->getType()->isVectorType())) {
+  if (E->getNumSubExprs() == 2) {
     Value *LHS = CGF.EmitScalarExpr(E->getExpr(0));
     Value *RHS = CGF.EmitScalarExpr(E->getExpr(1));
     Value *Mask;
@@ -993,22 +992,7 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
     llvm::VectorType *LTy = cast<llvm::VectorType>(LHS->getType());
     unsigned LHSElts = LTy->getNumElements();
 
-    if (E->getNumSubExprs() == 3) {
-      Mask = CGF.EmitScalarExpr(E->getExpr(2));
-
-      // Shuffle LHS & RHS into one input vector.
-      SmallVector<llvm::Constant*, 32> concat;
-      for (unsigned i = 0; i != LHSElts; ++i) {
-        concat.push_back(Builder.getInt32(2*i));
-        concat.push_back(Builder.getInt32(2*i+1));
-      }
-
-      Value* CV = llvm::ConstantVector::get(concat);
-      LHS = Builder.CreateShuffleVector(LHS, RHS, CV, "concat");
-      LHSElts *= 2;
-    } else {
-      Mask = RHS;
-    }
+    Mask = RHS;
 
     llvm::VectorType *MTy = cast<llvm::VectorType>(Mask->getType());
 
@@ -1366,8 +1350,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   QualType DestTy = CE->getType();
   CastKind Kind = CE->getCastKind();
 
-  if (!DestTy->isVoidType())
-    TestAndClearIgnoreResultAssign();
+  // These cases are generally not written to ignore the result of
+  // evaluating their sub-expressions, so we clear this now.
+  bool Ignored = TestAndClearIgnoreResultAssign();
 
   // Since almost all cast kinds apply to scalars, this switch doesn't have
   // a default case, so the compiler will warn on a missing case.  The cases
@@ -1410,7 +1395,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   }
   case CK_AddressSpaceConversion: {
     Value *Src = Visit(const_cast<Expr*>(E));
-    return Builder.CreateAddrSpaceCast(Src, ConvertType(DestTy));
+    // Since target may map different address spaces in AST to the same address
+    // space, an address space conversion may end up as a bitcast.
+    return Builder.CreatePointerBitCastOrAddrSpaceCast(Src,
+                                                       ConvertType(DestTy));
   }
   case CK_AtomicToNonAtomic:
   case CK_NonAtomicToAtomic:
@@ -1494,11 +1482,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     return CGF.EmitARCRetainScalarExpr(E);
   case CK_ARCConsumeObject:
     return CGF.EmitObjCConsumeObject(E->getType(), Visit(E));
-  case CK_ARCReclaimReturnedObject: {
-    llvm::Value *value = Visit(E);
-    value = CGF.EmitARCRetainAutoreleasedReturnValue(value);
-    return CGF.EmitObjCConsumeObject(E->getType(), value);
-  }
+  case CK_ARCReclaimReturnedObject:
+    return CGF.EmitARCReclaimReturnedObject(E, /*allowUnsafe*/ Ignored);
   case CK_ARCExtendBlockObject:
     return CGF.EmitARCExtendBlockObject(E);
 
@@ -1544,7 +1529,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     llvm::Type *DstTy = ConvertType(DestTy);
     Value *Elt = Visit(const_cast<Expr*>(E));
     // Splat the element across to all elements
-    unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
+    unsigned NumElements = DstTy->getVectorNumElements();
     return Builder.CreateVectorSplat(NumElements, Elt, "splat");
   }
 
@@ -1654,13 +1639,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
       if (isPre) {
         Builder.CreateStore(True, LV.getAddress(), LV.isVolatileQualified())
-          ->setAtomic(llvm::SequentiallyConsistent);
+          ->setAtomic(llvm::AtomicOrdering::SequentiallyConsistent);
         return Builder.getTrue();
       }
       // For atomic bool increment, we just store true and return it for
       // preincrement, do an atomic swap with true for postincrement
-        return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-            LV.getPointer(), True, llvm::SequentiallyConsistent);
+      return Builder.CreateAtomicRMW(
+          llvm::AtomicRMWInst::Xchg, LV.getPointer(), True,
+          llvm::AtomicOrdering::SequentiallyConsistent);
     }
     // Special case for atomic increment / decrement on integers, emit
     // atomicrmw instructions.  We skip this if we want to be doing overflow
@@ -1677,7 +1663,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       llvm::Value *amt = CGF.EmitToMemory(
           llvm::ConstantInt::get(ConvertType(type), 1, true), type);
       llvm::Value *old = Builder.CreateAtomicRMW(aop,
-          LV.getPointer(), amt, llvm::SequentiallyConsistent);
+          LV.getPointer(), amt, llvm::AtomicOrdering::SequentiallyConsistent);
       return isPre ? Builder.CreateBinOp(op, old, amt) : old;
     }
     value = EmitLoadOfLValue(LV, E->getExprLoc());
@@ -1794,15 +1780,19 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       amt = llvm::ConstantFP::get(VMContext,
                                   llvm::APFloat(static_cast<double>(amount)));
     else {
-      // Remaining types are either Half or LongDouble.  Convert from float.
+      // Remaining types are Half, LongDouble or __float128. Convert from float.
       llvm::APFloat F(static_cast<float>(amount));
       bool ignored;
+      const llvm::fltSemantics *FS;
       // Don't use getFloatTypeSemantics because Half isn't
       // necessarily represented using the "half" LLVM type.
-      F.convert(value->getType()->isHalfTy()
-                    ? CGF.getTarget().getHalfFormat()
-                    : CGF.getTarget().getLongDoubleFormat(),
-                llvm::APFloat::rmTowardZero, &ignored);
+      if (value->getType()->isFP128Ty())
+        FS = &CGF.getTarget().getFloat128Format();
+      else if (value->getType()->isHalfTy())
+        FS = &CGF.getTarget().getHalfFormat();
+      else
+        FS = &CGF.getTarget().getLongDoubleFormat();
+      F.convert(*FS, llvm::APFloat::rmTowardZero, &ignored);
       amt = llvm::ConstantFP::get(VMContext, F);
     }
     value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec");
@@ -2159,7 +2149,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
                                  E->getExprLoc()),
             LHSTy);
         Builder.CreateAtomicRMW(aop, LHSLV.getPointer(), amt,
-            llvm::SequentiallyConsistent);
+            llvm::AtomicOrdering::SequentiallyConsistent);
         return LHSLV;
       }
     }
@@ -2716,7 +2706,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
     RHS = Builder.CreateIntCast(RHS, Ops.LHS->getType(), false, "sh_prom");
 
   bool SanitizeBase = CGF.SanOpts.has(SanitizerKind::ShiftBase) &&
-                      Ops.Ty->hasSignedIntegerRepresentation();
+                      Ops.Ty->hasSignedIntegerRepresentation() &&
+                      !CGF.getLangOpts().isSignedOverflowDefined();
   bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent);
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
@@ -2993,15 +2984,17 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
     std::tie(LHS, RHS) = CGF.EmitARCStoreAutoreleasing(E);
     break;
 
+  case Qualifiers::OCL_ExplicitNone:
+    std::tie(LHS, RHS) = CGF.EmitARCStoreUnsafeUnretained(E, Ignore);
+    break;
+
   case Qualifiers::OCL_Weak:
     RHS = Visit(E->getRHS());
     LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
     RHS = CGF.EmitARCStoreWeak(LHS.getAddress(), RHS, Ignore);
     break;
 
-  // No reason to do any of these differently.
   case Qualifiers::OCL_None:
-  case Qualifiers::OCL_ExplicitNone:
     // __block variables need to have the rhs evaluated first, plus
     // this should improve codegen just a little.
     RHS = Visit(E->getRHS());
@@ -3366,9 +3359,11 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
 
   llvm::Type *ArgTy = ConvertType(VE->getType());
 
-  // If EmitVAArg fails, we fall back to the LLVM instruction.
-  if (!ArgPtr.isValid())
-    return Builder.CreateVAArg(ArgValue.getPointer(), ArgTy);
+  // If EmitVAArg fails, emit an error.
+  if (!ArgPtr.isValid()) {
+    CGF.ErrorUnsupported(VE, "va_arg expression");
+    return llvm::UndefValue::get(ArgTy);
+  }
 
   // FIXME Volatility.
   llvm::Value *Val = Builder.CreateLoad(ArgPtr);
@@ -3388,50 +3383,48 @@ Value *ScalarExprEmitter::VisitBlockExpr(const BlockExpr *block) {
   return CGF.EmitBlockLiteral(block);
 }
 
+// Convert a vec3 to vec4, or vice versa.
+static Value *ConvertVec3AndVec4(CGBuilderTy &Builder, CodeGenFunction &CGF,
+                                 Value *Src, unsigned NumElementsDst) {
+  llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
+  SmallVector<llvm::Constant*, 4> Args;
+  Args.push_back(Builder.getInt32(0));
+  Args.push_back(Builder.getInt32(1));
+  Args.push_back(Builder.getInt32(2));
+  if (NumElementsDst == 4)
+    Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
+  llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  return Builder.CreateShuffleVector(Src, UnV, Mask);
+}
+
 Value *ScalarExprEmitter::VisitAsTypeExpr(AsTypeExpr *E) {
   Value *Src  = CGF.EmitScalarExpr(E->getSrcExpr());
   llvm::Type *DstTy = ConvertType(E->getType());
 
-  // Going from vec4->vec3 or vec3->vec4 is a special case and requires
-  // a shuffle vector instead of a bitcast.
   llvm::Type *SrcTy = Src->getType();
-  if (isa<llvm::VectorType>(DstTy) && isa<llvm::VectorType>(SrcTy)) {
-    unsigned numElementsDst = cast<llvm::VectorType>(DstTy)->getNumElements();
-    unsigned numElementsSrc = cast<llvm::VectorType>(SrcTy)->getNumElements();
-    if ((numElementsDst == 3 && numElementsSrc == 4)
-        || (numElementsDst == 4 && numElementsSrc == 3)) {
-
-
-      // In the case of going from int4->float3, a bitcast is needed before
-      // doing a shuffle.
-      llvm::Type *srcElemTy =
-      cast<llvm::VectorType>(SrcTy)->getElementType();
-      llvm::Type *dstElemTy =
-      cast<llvm::VectorType>(DstTy)->getElementType();
-
-      if ((srcElemTy->isIntegerTy() && dstElemTy->isFloatTy())
-          || (srcElemTy->isFloatTy() && dstElemTy->isIntegerTy())) {
-        // Create a float type of the same size as the source or destination.
-        llvm::VectorType *newSrcTy = llvm::VectorType::get(dstElemTy,
-                                                                 numElementsSrc);
-
-        Src = Builder.CreateBitCast(Src, newSrcTy, "astypeCast");
-      }
-
-      llvm::Value *UnV = llvm::UndefValue::get(Src->getType());
-
-      SmallVector<llvm::Constant*, 3> Args;
-      Args.push_back(Builder.getInt32(0));
-      Args.push_back(Builder.getInt32(1));
-      Args.push_back(Builder.getInt32(2));
-
-      if (numElementsDst == 4)
-        Args.push_back(llvm::UndefValue::get(CGF.Int32Ty));
-
-      llvm::Constant *Mask = llvm::ConstantVector::get(Args);
+  unsigned NumElementsSrc = isa<llvm::VectorType>(SrcTy) ?
+    cast<llvm::VectorType>(SrcTy)->getNumElements() : 0;
+  unsigned NumElementsDst = isa<llvm::VectorType>(DstTy) ?
+    cast<llvm::VectorType>(DstTy)->getNumElements() : 0;
+
+  // Going from vec3 to non-vec3 is a special case and requires a shuffle
+  // vector to get a vec4, then a bitcast if the target type is different.
+  if (NumElementsSrc == 3 && NumElementsDst != 3) {
+    Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
+    Src = Builder.CreateBitCast(Src, DstTy);
+    Src->setName("astype");
+    return Src;
+  }
 
-      return Builder.CreateShuffleVector(Src, UnV, Mask, "astype");
-    }
+  // Going from non-vec3 to vec3 is a special case and requires a bitcast
+  // to vec4 if the original type is not vec4, then a shuffle vector to
+  // get a vec3.
+  if (NumElementsSrc != 3 && NumElementsDst == 3) {
+    auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
+    Src = Builder.CreateBitCast(Src, Vec4Ty);
+    Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
+    Src->setName("astype");
+    return Src;
   }
 
   return Builder.CreateBitCast(Src, DstTy, "astype");
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
index 0afe7db..51474f1 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -19,12 +19,15 @@
 using namespace clang::CodeGen;
 using namespace llvm;
 
-static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
+static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs,
+                              llvm::DebugLoc Location) {
 
   if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
       Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
       Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
-      Attrs.UnrollEnable == LoopAttributes::Unspecified)
+      Attrs.UnrollEnable == LoopAttributes::Unspecified &&
+      Attrs.DistributeEnable == LoopAttributes::Unspecified &&
+      !Location)
     return nullptr;
 
   SmallVector<Metadata *, 4> Args;
@@ -32,6 +35,10 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
   auto TempNode = MDNode::getTemporary(Ctx, None);
   Args.push_back(TempNode.get());
 
+  // If we have a valid debug location for the loop, add it.
+  if (Location)
+    Args.push_back(Location.getAsMDNode());
+
   // Setting vectorize.width
   if (Attrs.VectorizeWidth > 0) {
     Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"),
@@ -78,6 +85,14 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
     Args.push_back(MDNode::get(Ctx, Vals));
   }
 
+  if (Attrs.DistributeEnable != LoopAttributes::Unspecified) {
+    Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"),
+                        ConstantAsMetadata::get(ConstantInt::get(
+                            Type::getInt1Ty(Ctx), (Attrs.DistributeEnable ==
+                                                   LoopAttributes::Enable)))};
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
   // Set the first operand to itself.
   MDNode *LoopID = MDNode::get(Ctx, Args);
   LoopID->replaceOperandWith(0, LoopID);
@@ -87,7 +102,8 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
 LoopAttributes::LoopAttributes(bool IsParallel)
     : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified),
       UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
-      InterleaveCount(0), UnrollCount(0) {}
+      InterleaveCount(0), UnrollCount(0),
+      DistributeEnable(LoopAttributes::Unspecified) {}
 
 void LoopAttributes::clear() {
   IsParallel = false;
@@ -98,37 +114,60 @@ void LoopAttributes::clear() {
   UnrollEnable = LoopAttributes::Unspecified;
 }
 
-LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
+LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
+                   llvm::DebugLoc Location)
     : LoopID(nullptr), Header(Header), Attrs(Attrs) {
-  LoopID = createMetadata(Header->getContext(), Attrs);
+  LoopID = createMetadata(Header->getContext(), Attrs, Location);
 }
 
-void LoopInfoStack::push(BasicBlock *Header) {
-  Active.push_back(LoopInfo(Header, StagedAttrs));
+void LoopInfoStack::push(BasicBlock *Header, llvm::DebugLoc Location) {
+  Active.push_back(LoopInfo(Header, StagedAttrs, Location));
   // Clear the attributes so nested loops do not inherit them.
   StagedAttrs.clear();
 }
 
 void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
-                         ArrayRef<const clang::Attr *> Attrs) {
+                         ArrayRef<const clang::Attr *> Attrs,
+                         llvm::DebugLoc Location) {
 
   // Identify loop hint attributes from Attrs.
   for (const auto *Attr : Attrs) {
     const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
+    const OpenCLUnrollHintAttr *OpenCLHint =
+        dyn_cast<OpenCLUnrollHintAttr>(Attr);
 
     // Skip non loop hint attributes
-    if (!LH)
+    if (!LH && !OpenCLHint) {
       continue;
+    }
 
-    auto *ValueExpr = LH->getValue();
+    LoopHintAttr::OptionType Option = LoopHintAttr::Unroll;
+    LoopHintAttr::LoopHintState State = LoopHintAttr::Disable;
     unsigned ValueInt = 1;
-    if (ValueExpr) {
-      llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx);
-      ValueInt = ValueAPS.getSExtValue();
-    }
+    // Translate opencl_unroll_hint attribute argument to
+    // equivalent LoopHintAttr enums.
+    // OpenCL v2.0 s6.11.5:  
+    // 0 - full unroll (no argument).
+    // 1 - disable unroll.
+    // other positive integer n - unroll by n.
+    if (OpenCLHint) {
+      ValueInt = OpenCLHint->getUnrollHint();
+      if (ValueInt == 0) {
+        State = LoopHintAttr::Full;
+      } else if (ValueInt != 1) {
+        Option = LoopHintAttr::UnrollCount;
+        State = LoopHintAttr::Numeric;
+      }
+    } else if (LH) {
+      auto *ValueExpr = LH->getValue();
+      if (ValueExpr) {
+        llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx);
+        ValueInt = ValueAPS.getSExtValue();
+      }
 
-    LoopHintAttr::OptionType Option = LH->getOption();
-    LoopHintAttr::LoopHintState State = LH->getState();
+      Option = LH->getOption();
+      State = LH->getState();
+    }
     switch (State) {
     case LoopHintAttr::Disable:
       switch (Option) {
@@ -143,6 +182,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::Unroll:
         setUnrollState(LoopAttributes::Disable);
         break;
+      case LoopHintAttr::Distribute:
+        setDistributeState(false);
+        break;
       case LoopHintAttr::UnrollCount:
       case LoopHintAttr::VectorizeWidth:
       case LoopHintAttr::InterleaveCount:
@@ -159,6 +201,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::Unroll:
         setUnrollState(LoopAttributes::Enable);
         break;
+      case LoopHintAttr::Distribute:
+        setDistributeState(true);
+        break;
       case LoopHintAttr::UnrollCount:
       case LoopHintAttr::VectorizeWidth:
       case LoopHintAttr::InterleaveCount:
@@ -178,6 +223,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::UnrollCount:
       case LoopHintAttr::VectorizeWidth:
       case LoopHintAttr::InterleaveCount:
+      case LoopHintAttr::Distribute:
         llvm_unreachable("Options cannot be used to assume mem safety.");
         break;
       }
@@ -192,6 +238,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::UnrollCount:
       case LoopHintAttr::VectorizeWidth:
       case LoopHintAttr::InterleaveCount:
+      case LoopHintAttr::Distribute:
         llvm_unreachable("Options cannot be used with 'full' hint.");
         break;
       }
@@ -210,6 +257,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
       case LoopHintAttr::Unroll:
       case LoopHintAttr::Vectorize:
       case LoopHintAttr::Interleave:
+      case LoopHintAttr::Distribute:
         llvm_unreachable("Options cannot be assigned a value.");
         break;
       }
@@ -218,7 +266,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
   }
 
   /// Stage the attributes.
-  push(Header);
+  push(Header, Location);
 }
 
 void LoopInfoStack::pop() {
@@ -237,7 +285,7 @@ void LoopInfoStack::InsertHelper(Instruction *I) const {
   if (TerminatorInst *TI = dyn_cast<TerminatorInst>(I)) {
     for (unsigned i = 0, ie = TI->getNumSuccessors(); i < ie; ++i)
       if (TI->getSuccessor(i) == L.getHeader()) {
-        TI->setMetadata("llvm.loop", L.getLoopID());
+        TI->setMetadata(llvm::LLVMContext::MD_loop, L.getLoopID());
         break;
       }
     return;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
index ec33906..a0111ed 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGLoopInfo.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Compiler.h"
 
@@ -57,13 +58,17 @@ struct LoopAttributes {
 
   /// \brief llvm.unroll.
   unsigned UnrollCount;
+
+  /// \brief Value for llvm.loop.distribute.enable metadata.
+  LVEnableState DistributeEnable;
 };
 
 /// \brief Information used when generating a structured loop.
 class LoopInfo {
 public:
   /// \brief Construct a new LoopInfo for the loop with entry Header.
-  LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs);
+  LoopInfo(llvm::BasicBlock *Header, const LoopAttributes &Attrs,
+           llvm::DebugLoc Location);
 
   /// \brief Get the loop id metadata for this loop.
   llvm::MDNode *getLoopID() const { return LoopID; }
@@ -95,12 +100,14 @@ public:
 
   /// \brief Begin a new structured loop. The set of staged attributes will be
   /// applied to the loop and then cleared.
-  void push(llvm::BasicBlock *Header);
+  void push(llvm::BasicBlock *Header,
+            llvm::DebugLoc Location = llvm::DebugLoc());
 
   /// \brief Begin a new structured loop. Stage attributes from the Attrs list.
   /// The staged attributes are applied to the loop and then cleared.
   void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx,
-            llvm::ArrayRef<const Attr *> Attrs);
+            llvm::ArrayRef<const Attr *> Attrs,
+            llvm::DebugLoc Location = llvm::DebugLoc());
 
   /// \brief End the current loop.
   void pop();
@@ -126,6 +133,12 @@ public:
         Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
   }
 
+  /// \brief Set the next pushed loop as a distribution candidate.
+  void setDistributeState(bool Enable = true) {
+    StagedAttrs.DistributeEnable =
+        Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
+  }
+
   /// \brief Set the next pushed loop unroll state.
   void setUnrollState(const LoopAttributes::LVEnableState &State) {
     StagedAttrs.UnrollEnable = State;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
index 2d5991b..db894ce 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjC.cpp
@@ -590,9 +590,7 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar,
   args.add(RValue::get(CGF.Builder.getInt1(hasStrong)), Context.BoolTy);
 
   llvm::Value *fn = CGF.CGM.getObjCRuntime().GetGetStructFunction();
-  CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(Context.VoidTy, args,
-                                                      FunctionType::ExtInfo(),
-                                                      RequiredArgs::All),
+  CGF.EmitCall(CGF.getTypes().arrangeBuiltinFunctionCall(Context.VoidTy, args),
                fn, ReturnValueSlot(), args);
 }
 
@@ -856,10 +854,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF,
   
   llvm::Value *copyCppAtomicObjectFn = 
     CGF.CGM.getObjCRuntime().GetCppAtomicObjectGetFunction();
-  CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy,
-                                                      args,
-                                                      FunctionType::ExtInfo(),
-                                                      RequiredArgs::All),
+  CGF.EmitCall(
+      CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args),
                copyCppAtomicObjectFn, ReturnValueSlot(), args);
 }
 
@@ -901,21 +897,29 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
 
     // Currently, all atomic accesses have to be through integer
     // types, so there's no point in trying to pick a prettier type.
-    llvm::Type *bitcastType =
-      llvm::Type::getIntNTy(getLLVMContext(),
-                            getContext().toBits(strategy.getIvarSize()));
+    uint64_t ivarSize = getContext().toBits(strategy.getIvarSize());
+    llvm::Type *bitcastType = llvm::Type::getIntNTy(getLLVMContext(), ivarSize);
     bitcastType = bitcastType->getPointerTo(); // addrspace 0 okay
 
     // Perform an atomic load.  This does not impose ordering constraints.
     Address ivarAddr = LV.getAddress();
     ivarAddr = Builder.CreateBitCast(ivarAddr, bitcastType);
     llvm::LoadInst *load = Builder.CreateLoad(ivarAddr, "load");
-    load->setAtomic(llvm::Unordered);
+    load->setAtomic(llvm::AtomicOrdering::Unordered);
 
     // Store that value into the return address.  Doing this with a
     // bitcast is likely to produce some pretty ugly IR, but it's not
     // the *most* terrible thing in the world.
-    Builder.CreateStore(load, Builder.CreateBitCast(ReturnValue, bitcastType));
+    llvm::Type *retTy = ConvertType(getterMethod->getReturnType());
+    uint64_t retTySize = CGM.getDataLayout().getTypeSizeInBits(retTy);
+    llvm::Value *ivarVal = load;
+    if (ivarSize > retTySize) {
+      llvm::Type *newTy = llvm::Type::getIntNTy(getLLVMContext(), retTySize);
+      ivarVal = Builder.CreateTrunc(load, newTy);
+      bitcastType = newTy->getPointerTo();
+    }
+    Builder.CreateStore(ivarVal,
+                        Builder.CreateBitCast(ReturnValue, bitcastType));
 
     // Make sure we don't do an autorelease.
     AutoreleaseResult = false;
@@ -950,8 +954,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     // runtime already should have computed it to build the function.
     llvm::Instruction *CallInstruction;
     RValue RV = EmitCall(
-        getTypes().arrangeFreeFunctionCall(
-            propType, args, FunctionType::ExtInfo(), RequiredArgs::All),
+        getTypes().arrangeBuiltinFunctionCall(propType, args),
         getPropertyFn, ReturnValueSlot(), args, CGCalleeInfo(),
         &CallInstruction);
     if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction))
@@ -1015,7 +1018,6 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
           AutoreleaseResult = false;
         }
 
-        value = Builder.CreateBitCast(value, ConvertType(propType));
         value = Builder.CreateBitCast(
             value, ConvertType(GetterMethodDecl->getReturnType()));
       }
@@ -1067,10 +1069,8 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD,
   args.add(RValue::get(CGF.Builder.getFalse()), CGF.getContext().BoolTy);
 
   llvm::Value *copyStructFn = CGF.CGM.getObjCRuntime().GetSetStructFunction();
-  CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy,
-                                                      args,
-                                                      FunctionType::ExtInfo(),
-                                                      RequiredArgs::All),
+  CGF.EmitCall(
+      CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args),
                copyStructFn, ReturnValueSlot(), args);
 }
 
@@ -1105,10 +1105,8 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
   
   llvm::Value *copyCppAtomicObjectFn = 
     CGF.CGM.getObjCRuntime().GetCppAtomicObjectSetFunction();
-  CGF.EmitCall(CGF.getTypes().arrangeFreeFunctionCall(CGF.getContext().VoidTy,
-                                                      args,
-                                                      FunctionType::ExtInfo(),
-                                                      RequiredArgs::All),
+  CGF.EmitCall(
+      CGF.getTypes().arrangeBuiltinFunctionCall(CGF.getContext().VoidTy, args),
                copyCppAtomicObjectFn, ReturnValueSlot(), args);
 }
 
@@ -1192,7 +1190,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
 
     // Perform an atomic store.  There are no memory ordering requirements.
     llvm::StoreInst *store = Builder.CreateStore(load, ivarAddr);
-    store->setAtomic(llvm::Unordered);
+    store->setAtomic(llvm::AtomicOrdering::Unordered);
     return;
   }
 
@@ -1238,9 +1236,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
     if (setOptimizedPropertyFn) {
       args.add(RValue::get(arg), getContext().getObjCIdType());
       args.add(RValue::get(ivarOffset), getContext().getPointerDiffType());
-      EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args,
-                                                  FunctionType::ExtInfo(),
-                                                  RequiredArgs::All),
+      EmitCall(getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, args),
                setOptimizedPropertyFn, ReturnValueSlot(), args);
     } else {
       args.add(RValue::get(ivarOffset), getContext().getPointerDiffType());
@@ -1251,9 +1247,7 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
                getContext().BoolTy);
       // FIXME: We shouldn't need to get the function info here, the runtime
       // already should have computed it to build the function.
-      EmitCall(getTypes().arrangeFreeFunctionCall(getContext().VoidTy, args,
-                                                  FunctionType::ExtInfo(),
-                                                  RequiredArgs::All),
+      EmitCall(getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, args),
                setPropertyFn, ReturnValueSlot(), args);
     }
     
@@ -1498,6 +1492,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
                                       ArrayType::Normal, 0);
   Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr");
 
+  RunCleanupsScope ForScope(*this);
+
   // Emit the collection pointer.  In ARC, we do a retain.
   llvm::Value *Collection;
   if (getLangOpts().ObjCAutoRefCount) {
@@ -1610,9 +1606,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   Args2.add(RValue::get(V), getContext().getObjCIdType());
   // FIXME: We shouldn't need to get the function info here, the runtime already
   // should have computed it to build the function.
-  EmitCall(CGM.getTypes().arrangeFreeFunctionCall(getContext().VoidTy, Args2,
-                                                  FunctionType::ExtInfo(),
-                                                  RequiredArgs::All),
+  EmitCall(
+          CGM.getTypes().arrangeBuiltinFunctionCall(getContext().VoidTy, Args2),
            EnumerationMutationFn, ReturnValueSlot(), Args2);
 
   // Otherwise, or if the mutation function returns, just continue.
@@ -1739,10 +1734,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   if (DI)
     DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
 
-  // Leave the cleanup we entered in ARC.
-  if (getLangOpts().ObjCAutoRefCount)
-    PopCleanupBlock();
-
+  ForScope.ForceCleanup();
   EmitBlock(LoopEnd.getBlock());
 }
 
@@ -1980,20 +1972,14 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
   return result;
 }
 
-/// Retain the given object which is the result of a function call.
-///   call i8* \@objc_retainAutoreleasedReturnValue(i8* %value)
-///
-/// Yes, this function name is one character away from a different
-/// call with completely different semantics.
-llvm::Value *
-CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
+static void emitAutoreleasedReturnValueMarker(CodeGenFunction &CGF) {
   // Fetch the void(void) inline asm which marks that we're going to
-  // retain the autoreleased return value.
+  // do something with the autoreleased return value.
   llvm::InlineAsm *&marker
-    = CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker;
+    = CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker;
   if (!marker) {
     StringRef assembly
-      = CGM.getTargetCodeGenInfo()
+      = CGF.CGM.getTargetCodeGenInfo()
            .getARCRetainAutoreleasedReturnValueMarker();
 
     // If we have an empty assembly string, there's nothing to do.
@@ -2001,9 +1987,9 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
 
     // Otherwise, at -O0, build an inline asm that we're going to call
     // in a moment.
-    } else if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+    } else if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0) {
       llvm::FunctionType *type =
-        llvm::FunctionType::get(VoidTy, /*variadic*/false);
+        llvm::FunctionType::get(CGF.VoidTy, /*variadic*/false);
       
       marker = llvm::InlineAsm::get(type, assembly, "", /*sideeffects*/ true);
 
@@ -2012,25 +1998,50 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
     // optimizer to pick up.
     } else {
       llvm::NamedMDNode *metadata =
-        CGM.getModule().getOrInsertNamedMetadata(
+        CGF.CGM.getModule().getOrInsertNamedMetadata(
                             "clang.arc.retainAutoreleasedReturnValueMarker");
       assert(metadata->getNumOperands() <= 1);
       if (metadata->getNumOperands() == 0) {
-        metadata->addOperand(llvm::MDNode::get(
-            getLLVMContext(), llvm::MDString::get(getLLVMContext(), assembly)));
+        auto &ctx = CGF.getLLVMContext();
+        metadata->addOperand(llvm::MDNode::get(ctx,
+                                     llvm::MDString::get(ctx, assembly)));
       }
     }
   }
 
   // Call the marker asm if we made one, which we do only at -O0.
   if (marker)
-    Builder.CreateCall(marker);
+    CGF.Builder.CreateCall(marker);
+}
 
+/// Retain the given object which is the result of a function call.
+///   call i8* \@objc_retainAutoreleasedReturnValue(i8* %value)
+///
+/// Yes, this function name is one character away from a different
+/// call with completely different semantics.
+llvm::Value *
+CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
+  emitAutoreleasedReturnValueMarker(*this);
   return emitARCValueOperation(*this, value,
-                     CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
+              CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
                                "objc_retainAutoreleasedReturnValue");
 }
 
+/// Claim a possibly-autoreleased return value at +0.  This is only
+/// valid to do in contexts which do not rely on the retain to keep
+/// the object valid for for all of its uses; for example, when
+/// the value is ignored, or when it is being assigned to an
+/// __unsafe_unretained variable.
+///
+///   call i8* \@objc_unsafeClaimAutoreleasedReturnValue(i8* %value)
+llvm::Value *
+CodeGenFunction::EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value) {
+  emitAutoreleasedReturnValueMarker(*this);
+  return emitARCValueOperation(*this, value,
+              CGM.getObjCEntrypoints().objc_unsafeClaimAutoreleasedReturnValue,
+                               "objc_unsafeClaimAutoreleasedReturnValue");
+}
+
 /// Release the given object.
 ///   call void \@objc_release(i8* %value)
 void CodeGenFunction::EmitARCRelease(llvm::Value *value,
@@ -2446,25 +2457,22 @@ static TryEmitResult tryEmitARCRetainLoadOfScalar(CodeGenFunction &CGF,
   return tryEmitARCRetainLoadOfScalar(CGF, CGF.EmitLValue(e), type);
 }
 
-static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
-                                           llvm::Value *value);
+typedef llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+                                         llvm::Value *value)>
+  ValueTransform;
 
-/// Given that the given expression is some sort of call (which does
-/// not return retained), emit a retain following it.
-static llvm::Value *emitARCRetainCall(CodeGenFunction &CGF, const Expr *e) {
-  llvm::Value *value = CGF.EmitScalarExpr(e);
-  return emitARCRetainAfterCall(CGF, value);
-}
-
-static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
-                                           llvm::Value *value) {
+/// Insert code immediately after a call.
+static llvm::Value *emitARCOperationAfterCall(CodeGenFunction &CGF,
+                                              llvm::Value *value,
+                                              ValueTransform doAfterCall,
+                                              ValueTransform doFallback) {
   if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(value)) {
     CGBuilderTy::InsertPoint ip = CGF.Builder.saveIP();
 
     // Place the retain immediately following the call.
     CGF.Builder.SetInsertPoint(call->getParent(),
                                ++llvm::BasicBlock::iterator(call));
-    value = CGF.EmitARCRetainAutoreleasedReturnValue(value);
+    value = doAfterCall(CGF, value);
 
     CGF.Builder.restoreIP(ip);
     return value;
@@ -2474,7 +2482,7 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
     // Place the retain at the beginning of the normal destination block.
     llvm::BasicBlock *BB = invoke->getNormalDest();
     CGF.Builder.SetInsertPoint(BB, BB->begin());
-    value = CGF.EmitARCRetainAutoreleasedReturnValue(value);
+    value = doAfterCall(CGF, value);
 
     CGF.Builder.restoreIP(ip);
     return value;
@@ -2483,7 +2491,7 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
   // the operand.
   } else if (llvm::BitCastInst *bitcast = dyn_cast<llvm::BitCastInst>(value)) {
     llvm::Value *operand = bitcast->getOperand(0);
-    operand = emitARCRetainAfterCall(CGF, operand);
+    operand = emitARCOperationAfterCall(CGF, operand, doAfterCall, doFallback);
     bitcast->setOperand(0, operand);
     return bitcast;
 
@@ -2491,7 +2499,46 @@ static llvm::Value *emitARCRetainAfterCall(CodeGenFunction &CGF,
   } else {
     // Retain using the non-block variant: we never need to do a copy
     // of a block that's been returned to us.
-    return CGF.EmitARCRetainNonBlock(value);
+    return doFallback(CGF, value);
+  }
+}
+
+/// Given that the given expression is some sort of call (which does
+/// not return retained), emit a retain following it.
+static llvm::Value *emitARCRetainCallResult(CodeGenFunction &CGF,
+                                            const Expr *e) {
+  llvm::Value *value = CGF.EmitScalarExpr(e);
+  return emitARCOperationAfterCall(CGF, value,
+           [](CodeGenFunction &CGF, llvm::Value *value) {
+             return CGF.EmitARCRetainAutoreleasedReturnValue(value);
+           },
+           [](CodeGenFunction &CGF, llvm::Value *value) {
+             return CGF.EmitARCRetainNonBlock(value);
+           });
+}
+
+/// Given that the given expression is some sort of call (which does
+/// not return retained), perform an unsafeClaim following it.
+static llvm::Value *emitARCUnsafeClaimCallResult(CodeGenFunction &CGF,
+                                                 const Expr *e) {
+  llvm::Value *value = CGF.EmitScalarExpr(e);
+  return emitARCOperationAfterCall(CGF, value,
+           [](CodeGenFunction &CGF, llvm::Value *value) {
+             return CGF.EmitARCUnsafeClaimAutoreleasedReturnValue(value);
+           },
+           [](CodeGenFunction &CGF, llvm::Value *value) {
+             return value;
+           });
+}
+
+llvm::Value *CodeGenFunction::EmitARCReclaimReturnedObject(const Expr *E,
+                                                      bool allowUnsafeClaim) {
+  if (allowUnsafeClaim &&
+      CGM.getLangOpts().ObjCRuntime.hasARCUnsafeClaimAutoreleasedReturnValue()) {
+    return emitARCUnsafeClaimCallResult(*this, E);
+  } else {
+    llvm::Value *value = emitARCRetainCallResult(*this, E);
+    return EmitObjCConsumeObject(E->getType(), value);
   }
 }
 
@@ -2531,17 +2578,52 @@ static bool shouldEmitSeparateBlockRetain(const Expr *e) {
   return true;
 }
 
-/// Try to emit a PseudoObjectExpr at +1.
+namespace {
+/// A CRTP base class for emitting expressions of retainable object
+/// pointer type in ARC.
+template <typename Impl, typename Result> class ARCExprEmitter {
+protected:
+  CodeGenFunction &CGF;
+  Impl &asImpl() { return *static_cast<Impl*>(this); }
+
+  ARCExprEmitter(CodeGenFunction &CGF) : CGF(CGF) {}
+
+public:
+  Result visit(const Expr *e);
+  Result visitCastExpr(const CastExpr *e);
+  Result visitPseudoObjectExpr(const PseudoObjectExpr *e);
+  Result visitBinaryOperator(const BinaryOperator *e);
+  Result visitBinAssign(const BinaryOperator *e);
+  Result visitBinAssignUnsafeUnretained(const BinaryOperator *e);
+  Result visitBinAssignAutoreleasing(const BinaryOperator *e);
+  Result visitBinAssignWeak(const BinaryOperator *e);
+  Result visitBinAssignStrong(const BinaryOperator *e);
+
+  // Minimal implementation:
+  //   Result visitLValueToRValue(const Expr *e)
+  //   Result visitConsumeObject(const Expr *e)
+  //   Result visitExtendBlockObject(const Expr *e)
+  //   Result visitReclaimReturnedObject(const Expr *e)
+  //   Result visitCall(const Expr *e)
+  //   Result visitExpr(const Expr *e)
+  //
+  //   Result emitBitCast(Result result, llvm::Type *resultType)
+  //   llvm::Value *getValueOfResult(Result result)
+};
+}
+
+/// Try to emit a PseudoObjectExpr under special ARC rules.
 ///
 /// This massively duplicates emitPseudoObjectRValue.
-static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
-                                                  const PseudoObjectExpr *E) {
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitPseudoObjectExpr(const PseudoObjectExpr *E) {
   SmallVector<CodeGenFunction::OpaqueValueMappingData, 4> opaques;
 
   // Find the result expression.
   const Expr *resultExpr = E->getResultExpr();
   assert(resultExpr);
-  TryEmitResult result;
+  Result result;
 
   for (PseudoObjectExpr::const_semantics_iterator
          i = E->semantics_begin(), e = E->semantics_end(); i != e; ++i) {
@@ -2557,8 +2639,9 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
       // expression, try to evaluate the source as +1.
       if (ov == resultExpr) {
         assert(!OVMA::shouldBindAsLValue(ov));
-        result = tryEmitARCRetainScalarExpr(CGF, ov->getSourceExpr());
-        opaqueData = OVMA::bind(CGF, ov, RValue::get(result.getPointer()));
+        result = asImpl().visit(ov->getSourceExpr());
+        opaqueData = OVMA::bind(CGF, ov,
+                            RValue::get(asImpl().getValueOfResult(result)));
 
       // Otherwise, just bind it.
       } else {
@@ -2569,7 +2652,7 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
     // Otherwise, if the expression is the result, evaluate it
     // and remember the result.
     } else if (semantic == resultExpr) {
-      result = tryEmitARCRetainScalarExpr(CGF, semantic);
+      result = asImpl().visit(semantic);
 
     // Otherwise, evaluate the expression in an ignored context.
     } else {
@@ -2584,146 +2667,240 @@ static TryEmitResult tryEmitARCRetainPseudoObject(CodeGenFunction &CGF,
   return result;
 }
 
-static TryEmitResult
-tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) {
+template <typename Impl, typename Result>
+Result ARCExprEmitter<Impl,Result>::visitCastExpr(const CastExpr *e) {
+  switch (e->getCastKind()) {
+
+  // No-op casts don't change the type, so we just ignore them.
+  case CK_NoOp:
+    return asImpl().visit(e->getSubExpr());
+
+  // These casts can change the type.
+  case CK_CPointerToObjCPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_BitCast: {
+    llvm::Type *resultType = CGF.ConvertType(e->getType());
+    assert(e->getSubExpr()->getType()->hasPointerRepresentation());
+    Result result = asImpl().visit(e->getSubExpr());
+    return asImpl().emitBitCast(result, resultType);
+  }
+
+  // Handle some casts specially.
+  case CK_LValueToRValue:
+    return asImpl().visitLValueToRValue(e->getSubExpr());
+  case CK_ARCConsumeObject:
+    return asImpl().visitConsumeObject(e->getSubExpr());
+  case CK_ARCExtendBlockObject:
+    return asImpl().visitExtendBlockObject(e->getSubExpr());
+  case CK_ARCReclaimReturnedObject:
+    return asImpl().visitReclaimReturnedObject(e->getSubExpr());
+
+  // Otherwise, use the default logic.
+  default:
+    return asImpl().visitExpr(e);
+  }
+}
+
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitBinaryOperator(const BinaryOperator *e) {
+  switch (e->getOpcode()) {
+  case BO_Comma:
+    CGF.EmitIgnoredExpr(e->getLHS());
+    CGF.EnsureInsertPoint();
+    return asImpl().visit(e->getRHS());
+
+  case BO_Assign:
+    return asImpl().visitBinAssign(e);
+
+  default:
+    return asImpl().visitExpr(e);
+  }
+}
+
+template <typename Impl, typename Result>
+Result ARCExprEmitter<Impl,Result>::visitBinAssign(const BinaryOperator *e) {
+  switch (e->getLHS()->getType().getObjCLifetime()) {
+  case Qualifiers::OCL_ExplicitNone:
+    return asImpl().visitBinAssignUnsafeUnretained(e);
+
+  case Qualifiers::OCL_Weak:
+    return asImpl().visitBinAssignWeak(e);
+
+  case Qualifiers::OCL_Autoreleasing:
+    return asImpl().visitBinAssignAutoreleasing(e);
+
+  case Qualifiers::OCL_Strong:
+    return asImpl().visitBinAssignStrong(e);
+
+  case Qualifiers::OCL_None:
+    return asImpl().visitExpr(e);
+  }
+  llvm_unreachable("bad ObjC ownership qualifier");
+}
+
+/// The default rule for __unsafe_unretained emits the RHS recursively,
+/// stores into the unsafe variable, and propagates the result outward.
+template <typename Impl, typename Result>
+Result ARCExprEmitter<Impl,Result>::
+                    visitBinAssignUnsafeUnretained(const BinaryOperator *e) {
+  // Recursively emit the RHS.
+  // For __block safety, do this before emitting the LHS.
+  Result result = asImpl().visit(e->getRHS());
+
+  // Perform the store.
+  LValue lvalue =
+    CGF.EmitCheckedLValue(e->getLHS(), CodeGenFunction::TCK_Store);
+  CGF.EmitStoreThroughLValue(RValue::get(asImpl().getValueOfResult(result)),
+                             lvalue);
+
+  return result;
+}
+
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitBinAssignAutoreleasing(const BinaryOperator *e) {
+  return asImpl().visitExpr(e);
+}
+
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitBinAssignWeak(const BinaryOperator *e) {
+  return asImpl().visitExpr(e);
+}
+
+template <typename Impl, typename Result>
+Result
+ARCExprEmitter<Impl,Result>::visitBinAssignStrong(const BinaryOperator *e) {
+  return asImpl().visitExpr(e);
+}
+
+/// The general expression-emission logic.
+template <typename Impl, typename Result>
+Result ARCExprEmitter<Impl,Result>::visit(const Expr *e) {
   // We should *never* see a nested full-expression here, because if
   // we fail to emit at +1, our caller must not retain after we close
-  // out the full-expression.
+  // out the full-expression.  This isn't as important in the unsafe
+  // emitter.
   assert(!isa<ExprWithCleanups>(e));
 
-  // The desired result type, if it differs from the type of the
-  // ultimate opaque expression.
-  llvm::Type *resultType = nullptr;
-
-  while (true) {
-    e = e->IgnoreParens();
-
-    // There's a break at the end of this if-chain;  anything
-    // that wants to keep looping has to explicitly continue.
-    if (const CastExpr *ce = dyn_cast<CastExpr>(e)) {
-      switch (ce->getCastKind()) {
-      // No-op casts don't change the type, so we just ignore them.
-      case CK_NoOp:
-        e = ce->getSubExpr();
-        continue;
-
-      case CK_LValueToRValue: {
-        TryEmitResult loadResult
-          = tryEmitARCRetainLoadOfScalar(CGF, ce->getSubExpr());
-        if (resultType) {
-          llvm::Value *value = loadResult.getPointer();
-          value = CGF.Builder.CreateBitCast(value, resultType);
-          loadResult.setPointer(value);
-        }
-        return loadResult;
-      }
+  // Look through parens, __extension__, generic selection, etc.
+  e = e->IgnoreParens();
 
-      // These casts can change the type, so remember that and
-      // soldier on.  We only need to remember the outermost such
-      // cast, though.
-      case CK_CPointerToObjCPointerCast:
-      case CK_BlockPointerToObjCPointerCast:
-      case CK_AnyPointerToBlockPointerCast:
-      case CK_BitCast:
-        if (!resultType)
-          resultType = CGF.ConvertType(ce->getType());
-        e = ce->getSubExpr();
-        assert(e->getType()->hasPointerRepresentation());
-        continue;
-
-      // For consumptions, just emit the subexpression and thus elide
-      // the retain/release pair.
-      case CK_ARCConsumeObject: {
-        llvm::Value *result = CGF.EmitScalarExpr(ce->getSubExpr());
-        if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-        return TryEmitResult(result, true);
-      }
+  // Handle certain kinds of casts.
+  if (const CastExpr *ce = dyn_cast<CastExpr>(e)) {
+    return asImpl().visitCastExpr(ce);
 
-      // Block extends are net +0.  Naively, we could just recurse on
-      // the subexpression, but actually we need to ensure that the
-      // value is copied as a block, so there's a little filter here.
-      case CK_ARCExtendBlockObject: {
-        llvm::Value *result; // will be a +0 value
+  // Handle the comma operator.
+  } else if (auto op = dyn_cast<BinaryOperator>(e)) {
+    return asImpl().visitBinaryOperator(op);
 
-        // If we can't safely assume the sub-expression will produce a
-        // block-copied value, emit the sub-expression at +0.
-        if (shouldEmitSeparateBlockRetain(ce->getSubExpr())) {
-          result = CGF.EmitScalarExpr(ce->getSubExpr());
+  // TODO: handle conditional operators here
 
-        // Otherwise, try to emit the sub-expression at +1 recursively.
-        } else {
-          TryEmitResult subresult
-            = tryEmitARCRetainScalarExpr(CGF, ce->getSubExpr());
-          result = subresult.getPointer();
-
-          // If that produced a retained value, just use that,
-          // possibly casting down.
-          if (subresult.getInt()) {
-            if (resultType)
-              result = CGF.Builder.CreateBitCast(result, resultType);
-            return TryEmitResult(result, true);
-          }
+  // For calls and message sends, use the retained-call logic.
+  // Delegate inits are a special case in that they're the only
+  // returns-retained expression that *isn't* surrounded by
+  // a consume.
+  } else if (isa<CallExpr>(e) ||
+             (isa<ObjCMessageExpr>(e) &&
+              !cast<ObjCMessageExpr>(e)->isDelegateInitCall())) {
+    return asImpl().visitCall(e);
 
-          // Otherwise it's +0.
-        }
+  // Look through pseudo-object expressions.
+  } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) {
+    return asImpl().visitPseudoObjectExpr(pseudo);
+  }
 
-        // Retain the object as a block, then cast down.
-        result = CGF.EmitARCRetainBlock(result, /*mandatory*/ true);
-        if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-        return TryEmitResult(result, true);
-      }
+  return asImpl().visitExpr(e);
+}
 
-      // For reclaims, emit the subexpression as a retained call and
-      // skip the consumption.
-      case CK_ARCReclaimReturnedObject: {
-        llvm::Value *result = emitARCRetainCall(CGF, ce->getSubExpr());
-        if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-        return TryEmitResult(result, true);
-      }
+namespace {
 
-      default:
-        break;
-      }
+/// An emitter for +1 results.
+struct ARCRetainExprEmitter :
+  public ARCExprEmitter<ARCRetainExprEmitter, TryEmitResult> {
 
-    // Skip __extension__.
-    } else if (const UnaryOperator *op = dyn_cast<UnaryOperator>(e)) {
-      if (op->getOpcode() == UO_Extension) {
-        e = op->getSubExpr();
-        continue;
-      }
+  ARCRetainExprEmitter(CodeGenFunction &CGF) : ARCExprEmitter(CGF) {}
+
+  llvm::Value *getValueOfResult(TryEmitResult result) {
+    return result.getPointer();
+  }
 
-    // For calls and message sends, use the retained-call logic.
-    // Delegate inits are a special case in that they're the only
-    // returns-retained expression that *isn't* surrounded by
-    // a consume.
-    } else if (isa<CallExpr>(e) ||
-               (isa<ObjCMessageExpr>(e) &&
-                !cast<ObjCMessageExpr>(e)->isDelegateInitCall())) {
-      llvm::Value *result = emitARCRetainCall(CGF, e);
-      if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-      return TryEmitResult(result, true);
-
-    // Look through pseudo-object expressions.
-    } else if (const PseudoObjectExpr *pseudo = dyn_cast<PseudoObjectExpr>(e)) {
-      TryEmitResult result
-        = tryEmitARCRetainPseudoObject(CGF, pseudo);
-      if (resultType) {
-        llvm::Value *value = result.getPointer();
-        value = CGF.Builder.CreateBitCast(value, resultType);
-        result.setPointer(value);
+  TryEmitResult emitBitCast(TryEmitResult result, llvm::Type *resultType) {
+    llvm::Value *value = result.getPointer();
+    value = CGF.Builder.CreateBitCast(value, resultType);
+    result.setPointer(value);
+    return result;
+  }
+
+  TryEmitResult visitLValueToRValue(const Expr *e) {
+    return tryEmitARCRetainLoadOfScalar(CGF, e);
+  }
+
+  /// For consumptions, just emit the subexpression and thus elide
+  /// the retain/release pair.
+  TryEmitResult visitConsumeObject(const Expr *e) {
+    llvm::Value *result = CGF.EmitScalarExpr(e);
+    return TryEmitResult(result, true);
+  }
+
+  /// Block extends are net +0.  Naively, we could just recurse on
+  /// the subexpression, but actually we need to ensure that the
+  /// value is copied as a block, so there's a little filter here.
+  TryEmitResult visitExtendBlockObject(const Expr *e) {
+    llvm::Value *result; // will be a +0 value
+
+    // If we can't safely assume the sub-expression will produce a
+    // block-copied value, emit the sub-expression at +0.
+    if (shouldEmitSeparateBlockRetain(e)) {
+      result = CGF.EmitScalarExpr(e);
+
+    // Otherwise, try to emit the sub-expression at +1 recursively.
+    } else {
+      TryEmitResult subresult = asImpl().visit(e);
+
+      // If that produced a retained value, just use that.
+      if (subresult.getInt()) {
+        return subresult;
       }
-      return result;
+
+      // Otherwise it's +0.
+      result = subresult.getPointer();
     }
 
-    // Conservatively halt the search at any other expression kind.
-    break;
+    // Retain the object as a block.
+    result = CGF.EmitARCRetainBlock(result, /*mandatory*/ true);
+    return TryEmitResult(result, true);
   }
 
-  // We didn't find an obvious production, so emit what we've got and
-  // tell the caller that we didn't manage to retain.
-  llvm::Value *result = CGF.EmitScalarExpr(e);
-  if (resultType) result = CGF.Builder.CreateBitCast(result, resultType);
-  return TryEmitResult(result, false);
+  /// For reclaims, emit the subexpression as a retained call and
+  /// skip the consumption.
+  TryEmitResult visitReclaimReturnedObject(const Expr *e) {
+    llvm::Value *result = emitARCRetainCallResult(CGF, e);
+    return TryEmitResult(result, true);
+  }
+
+  /// When we have an undecorated call, retroactively do a claim.
+  TryEmitResult visitCall(const Expr *e) {
+    llvm::Value *result = emitARCRetainCallResult(CGF, e);
+    return TryEmitResult(result, true);
+  }
+
+  // TODO: maybe special-case visitBinAssignWeak?
+
+  TryEmitResult visitExpr(const Expr *e) {
+    // We didn't find an obvious production, so emit what we've got and
+    // tell the caller that we didn't manage to retain.
+    llvm::Value *result = CGF.EmitScalarExpr(e);
+    return TryEmitResult(result, false);
+  }
+};
+}
+
+static TryEmitResult
+tryEmitARCRetainScalarExpr(CodeGenFunction &CGF, const Expr *e) {
+  return ARCRetainExprEmitter(CGF).visit(e);
 }
 
 static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
@@ -2807,6 +2984,96 @@ llvm::Value *CodeGenFunction::EmitObjCThrowOperand(const Expr *expr) {
   return EmitScalarExpr(expr);
 }
 
+namespace {
+
+/// An emitter for assigning into an __unsafe_unretained context.
+struct ARCUnsafeUnretainedExprEmitter :
+  public ARCExprEmitter<ARCUnsafeUnretainedExprEmitter, llvm::Value*> {
+
+  ARCUnsafeUnretainedExprEmitter(CodeGenFunction &CGF) : ARCExprEmitter(CGF) {}
+
+  llvm::Value *getValueOfResult(llvm::Value *value) {
+    return value;
+  }
+
+  llvm::Value *emitBitCast(llvm::Value *value, llvm::Type *resultType) {
+    return CGF.Builder.CreateBitCast(value, resultType);
+  }
+
+  llvm::Value *visitLValueToRValue(const Expr *e) {
+    return CGF.EmitScalarExpr(e);
+  }
+
+  /// For consumptions, just emit the subexpression and perform the
+  /// consumption like normal.
+  llvm::Value *visitConsumeObject(const Expr *e) {
+    llvm::Value *value = CGF.EmitScalarExpr(e);
+    return CGF.EmitObjCConsumeObject(e->getType(), value);
+  }
+
+  /// No special logic for block extensions.  (This probably can't
+  /// actually happen in this emitter, though.)
+  llvm::Value *visitExtendBlockObject(const Expr *e) {
+    return CGF.EmitARCExtendBlockObject(e);
+  }
+
+  /// For reclaims, perform an unsafeClaim if that's enabled.
+  llvm::Value *visitReclaimReturnedObject(const Expr *e) {
+    return CGF.EmitARCReclaimReturnedObject(e, /*unsafe*/ true);
+  }
+
+  /// When we have an undecorated call, just emit it without adding
+  /// the unsafeClaim.
+  llvm::Value *visitCall(const Expr *e) {
+    return CGF.EmitScalarExpr(e);
+  }
+
+  /// Just do normal scalar emission in the default case.
+  llvm::Value *visitExpr(const Expr *e) {
+    return CGF.EmitScalarExpr(e);
+  }
+};
+}
+
+static llvm::Value *emitARCUnsafeUnretainedScalarExpr(CodeGenFunction &CGF,
+                                                      const Expr *e) {
+  return ARCUnsafeUnretainedExprEmitter(CGF).visit(e);
+}
+
+/// EmitARCUnsafeUnretainedScalarExpr - Semantically equivalent to
+/// immediately releasing the resut of EmitARCRetainScalarExpr, but
+/// avoiding any spurious retains, including by performing reclaims
+/// with objc_unsafeClaimAutoreleasedReturnValue.
+llvm::Value *CodeGenFunction::EmitARCUnsafeUnretainedScalarExpr(const Expr *e) {
+  // Look through full-expressions.
+  if (const ExprWithCleanups *cleanups = dyn_cast<ExprWithCleanups>(e)) {
+    enterFullExpression(cleanups);
+    RunCleanupsScope scope(*this);
+    return emitARCUnsafeUnretainedScalarExpr(*this, cleanups->getSubExpr());
+  }
+
+  return emitARCUnsafeUnretainedScalarExpr(*this, e);
+}
+
+std::pair<LValue,llvm::Value*>
+CodeGenFunction::EmitARCStoreUnsafeUnretained(const BinaryOperator *e,
+                                              bool ignored) {
+  // Evaluate the RHS first.  If we're ignoring the result, assume
+  // that we can emit at an unsafe +0.
+  llvm::Value *value;
+  if (ignored) {
+    value = EmitARCUnsafeUnretainedScalarExpr(e->getRHS());
+  } else {
+    value = EmitScalarExpr(e->getRHS());
+  }
+
+  // Emit the LHS and perform the store.
+  LValue lvalue = EmitLValue(e->getLHS());
+  EmitStoreOfScalar(value, lvalue);
+
+  return std::pair<LValue,llvm::Value*>(std::move(lvalue), value);
+}
+
 std::pair<LValue,llvm::Value*>
 CodeGenFunction::EmitARCStoreStrong(const BinaryOperator *e,
                                     bool ignored) {
@@ -2935,8 +3202,8 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
   ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
 
   llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
   
@@ -3016,8 +3283,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
   ImplicitParamDecl srcDecl(getContext(), FD, SourceLocation(), nullptr, SrcTy);
   args.push_back(&srcDecl);
 
-  const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, args, FunctionType::ExtInfo(), RequiredArgs::All);
+  const CGFunctionInfo &FI =
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, args);
 
   llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
   
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
index f0af3e9..caafef8 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -35,11 +35,9 @@
 #include "llvm/Support/Compiler.h"
 #include <cstdarg>
 
-
 using namespace clang;
 using namespace CodeGen;
 
-
 namespace {
 /// Class that lazily initialises the runtime function.  Avoids inserting the
 /// types and the function declaration into a module if they're not used, and
@@ -161,6 +159,7 @@ protected:
   /// runtime provides some LLVM passes that can use this to do things like
   /// automatic IMP caching and speculative inlining.
   unsigned msgSendMDKind;
+
   /// Helper function that generates a constant string and returns a pointer to
   /// the start of the string.  The result of this function can be used anywhere
   /// where the C code specifies const char*.  
@@ -170,6 +169,7 @@ protected:
     return llvm::ConstantExpr::getGetElementPtr(Array.getElementType(),
                                                 Array.getPointer(), Zeros);
   }
+
   /// Emits a linkonce_odr string, whose name is the prefix followed by the
   /// string value.  This allows the linker to combine the strings between
   /// different modules.  Used for EH typeinfo names, selector strings, and a
@@ -186,6 +186,7 @@ protected:
     return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
                                                 ConstStr, Zeros);
   }
+
   /// Generates a global structure, initialized by the elements in the vector.
   /// The element types must match the types of the structure elements in the
   /// first argument.
@@ -201,6 +202,7 @@ protected:
     GV->setAlignment(Align.getQuantity());
     return GV;
   }
+
   /// Generates a global array.  The vector must contain the same number of
   /// elements that the array type declares, of the type specified as the array
   /// element type.
@@ -216,6 +218,7 @@ protected:
     GV->setAlignment(Align.getQuantity());
     return GV;
   }
+
   /// Generates a global array, inferring the array type from the specified
   /// element type and the size of the initialiser.  
   llvm::GlobalVariable *MakeGlobalArray(llvm::Type *Ty,
@@ -227,6 +230,7 @@ protected:
     llvm::ArrayType *ArrayTy = llvm::ArrayType::get(Ty, V.size());
     return MakeGlobal(ArrayTy, V, Align, Name, linkage);
   }
+
   /// Returns a property name and encoding string.
   llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD,
                                              const Decl *Container) {
@@ -245,6 +249,7 @@ protected:
     }
     return MakeConstantString(PD->getNameAsString());
   }
+
   /// Push the property attributes into two structure fields. 
   void PushPropertyAttributes(std::vector<llvm::Constant*> &Fields,
       ObjCPropertyDecl *property, bool isSynthesized=true, bool
@@ -273,6 +278,7 @@ protected:
     Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0));
     Fields.push_back(llvm::ConstantInt::get(Int8Ty, 0));
   }
+
   /// Ensures that the value has the required type, by inserting a bitcast if
   /// required.  This function lets us avoid inserting bitcasts that are
   /// redundant.
@@ -284,12 +290,14 @@ protected:
     if (V.getType() == Ty) return V;
     return B.CreateBitCast(V, Ty);
   }
+
   // Some zeros used for GEPs in lots of places.
   llvm::Constant *Zeros[2];
   /// Null pointer value.  Mainly used as a terminator in various arrays.
   llvm::Constant *NULLPtr;
   /// LLVM context.
   llvm::LLVMContext &VMContext;
+
 private:
   /// Placeholder for the class.  Lots of things refer to the class before we've
   /// actually emitted it.  We use this alias as a placeholder, and then replace
@@ -360,7 +368,6 @@ protected:
   LazyRuntimeFunction SyncExitFn;
 
 private:
-
   /// Function called if fast enumeration detects that the collection is
   /// modified during the update.
   LazyRuntimeFunction EnumerationMutationFn;
@@ -385,7 +392,7 @@ private:
   /// Objective-C 1 property structures when targeting the GCC runtime or it
   /// will abort.
   const int ProtocolVersion;
-private:
+
   /// Generates an instance variable list structure.  This is a structure
   /// containing a size and an array of structures containing instance variable
   /// metadata.  This is used purely for introspection in the fragile ABI.  In
@@ -393,6 +400,7 @@ private:
   llvm::Constant *GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
                                    ArrayRef<llvm::Constant *> IvarTypes,
                                    ArrayRef<llvm::Constant *> IvarOffsets);
+
   /// Generates a method list structure.  This is a structure containing a size
   /// and an array of structures containing method metadata.
   ///
@@ -403,23 +411,28 @@ private:
       ArrayRef<Selector> MethodSels,
       ArrayRef<llvm::Constant *> MethodTypes,
       bool isClassMethodList);
+
   /// Emits an empty protocol.  This is used for \@protocol() where no protocol
   /// is found.  The runtime will (hopefully) fix up the pointer to refer to the
   /// real protocol.
   llvm::Constant *GenerateEmptyProtocol(const std::string &ProtocolName);
+
   /// Generates a list of property metadata structures.  This follows the same
   /// pattern as method and instance variable metadata lists.
   llvm::Constant *GeneratePropertyList(const ObjCImplementationDecl *OID,
         SmallVectorImpl<Selector> &InstanceMethodSels,
         SmallVectorImpl<llvm::Constant*> &InstanceMethodTypes);
+
   /// Generates a list of referenced protocols.  Classes, categories, and
   /// protocols all use this structure.
   llvm::Constant *GenerateProtocolList(ArrayRef<std::string> Protocols);
+
   /// To ensure that all protocols are seen by the runtime, we add a category on
   /// a class defined in the runtime, declaring no methods, but adopting the
   /// protocols.  This is a horribly ugly hack, but it allows us to collect all
   /// of the protocols without changing the ABI.
   void GenerateProtocolHolderCategory();
+
   /// Generates a class structure.
   llvm::Constant *GenerateClassStructure(
       llvm::Constant *MetaClass,
@@ -436,25 +449,31 @@ private:
       llvm::Constant *StrongIvarBitmap,
       llvm::Constant *WeakIvarBitmap,
       bool isMeta=false);
+
   /// Generates a method list.  This is used by protocols to define the required
   /// and optional methods.
   llvm::Constant *GenerateProtocolMethodList(
       ArrayRef<llvm::Constant *> MethodNames,
       ArrayRef<llvm::Constant *> MethodTypes);
+
   /// Returns a selector with the specified type encoding.  An empty string is
   /// used to return an untyped selector (with the types field set to NULL).
   llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
                            const std::string &TypeEncoding);
+
   /// Returns the variable used to store the offset of an instance variable.
   llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
       const ObjCIvarDecl *Ivar);
   /// Emits a reference to a class.  This allows the linker to object if there
   /// is no class of the matching name.
+
 protected:
   void EmitClassRef(const std::string &className);
+
   /// Emits a pointer to the named class
   virtual llvm::Value *GetClassNamed(CodeGenFunction &CGF,
                                      const std::string &Name, bool isWeak);
+
   /// Looks up the method for sending a message to the specified object.  This
   /// mechanism differs between the GCC and GNU runtimes, so this method must be
   /// overridden in subclasses.
@@ -463,6 +482,7 @@ protected:
                                  llvm::Value *cmd,
                                  llvm::MDNode *node,
                                  MessageSendInfo &MSI) = 0;
+
   /// Looks up the method for sending a message to a superclass.  This
   /// mechanism differs between the GCC and GNU runtimes, so this method must
   /// be overridden in subclasses.
@@ -470,6 +490,7 @@ protected:
                                       Address ObjCSuper,
                                       llvm::Value *cmd,
                                       MessageSendInfo &MSI) = 0;
+
   /// Libobjc2 uses a bitfield representation where small(ish) bitfields are
   /// stored in a 64-bit value with the low bit set to 1 and the remaining 63
   /// bits set to their values, LSB first, while larger ones are stored in a
@@ -482,6 +503,7 @@ protected:
   /// a bitfield with the 64th bit set will be (int64_t)&{ 2, [0, 1<<31] },
   /// while a bitfield / with the 63rd bit set will be 1<<64.
   llvm::Constant *MakeBitField(ArrayRef<bool> bits);
+
 public:
   CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
       unsigned protocolClassVersion);
@@ -569,11 +591,12 @@ public:
     return NULLPtr;
   }
 
-  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+  llvm::GlobalVariable *GetClassGlobal(StringRef Name,
                                        bool Weak = false) override {
     return nullptr;
   }
 };
+
 /// Class representing the legacy GCC Objective-C ABI.  This is the default when
 /// -fobjc-nonfragile-abi is not specified.
 ///
@@ -590,6 +613,7 @@ class CGObjCGCC : public CGObjCGNU {
   /// structure describing the receiver and the class, and a selector as
   /// arguments.  Returns the IMP for the corresponding method.
   LazyRuntimeFunction MsgLookupSuperFn;
+
 protected:
   llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
                          llvm::Value *cmd, llvm::MDNode *node,
@@ -602,23 +626,26 @@ protected:
     imp->setMetadata(msgSendMDKind, node);
     return imp.getInstruction();
   }
+
   llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                               llvm::Value *cmd, MessageSendInfo &MSI) override {
-      CGBuilderTy &Builder = CGF.Builder;
-      llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
-          PtrToObjCSuperTy).getPointer(), cmd};
-      return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
-    }
-  public:
-    CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
-      // IMP objc_msg_lookup(id, SEL);
-      MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy,
-                       nullptr);
-      // IMP objc_msg_lookup_super(struct objc_super*, SEL);
-      MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
-              PtrToObjCSuperTy, SelectorTy, nullptr);
-    }
+    CGBuilderTy &Builder = CGF.Builder;
+    llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
+        PtrToObjCSuperTy).getPointer(), cmd};
+    return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
+  }
+
+public:
+  CGObjCGCC(CodeGenModule &Mod) : CGObjCGNU(Mod, 8, 2) {
+    // IMP objc_msg_lookup(id, SEL);
+    MsgLookupFn.init(&CGM, "objc_msg_lookup", IMPTy, IdTy, SelectorTy,
+                     nullptr);
+    // IMP objc_msg_lookup_super(struct objc_super*, SEL);
+    MsgLookupSuperFn.init(&CGM, "objc_msg_lookup_super", IMPTy,
+                          PtrToObjCSuperTy, SelectorTy, nullptr);
+  }
 };
+
 /// Class used when targeting the new GNUstep runtime ABI.
 class CGObjCGNUstep : public CGObjCGNU {
     /// The slot lookup function.  Returns a pointer to a cacheable structure
@@ -646,8 +673,10 @@ class CGObjCGNUstep : public CGObjCGNU {
     /// Type of an slot structure pointer.  This is returned by the various
     /// lookup functions.
     llvm::Type *SlotTy;
+
   public:
     llvm::Constant *GetEHType(QualType T) override;
+
   protected:
     llvm::Value *LookupIMP(CodeGenFunction &CGF, llvm::Value *&Receiver,
                            llvm::Value *cmd, llvm::MDNode *node,
@@ -689,6 +718,7 @@ class CGObjCGNUstep : public CGObjCGNU {
       Receiver = Builder.CreateLoad(ReceiverPtr, true);
       return imp;
     }
+
     llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                                 llvm::Value *cmd,
                                 MessageSendInfo &MSI) override {
@@ -702,6 +732,7 @@ class CGObjCGNUstep : public CGObjCGNU {
       return Builder.CreateAlignedLoad(Builder.CreateStructGEP(nullptr, slot, 4),
                                        CGF.getPointerAlign());
     }
+
   public:
     CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) {
       const ObjCRuntime &R = CGM.getLangOpts().ObjCRuntime;
@@ -753,6 +784,7 @@ class CGObjCGNUstep : public CGObjCGNU {
       CxxAtomicObjectGetFn.init(&CGM, "objc_getCppObjectAtomic", VoidTy, PtrTy,
           PtrTy, PtrTy, nullptr);
     }
+
     llvm::Constant *GetCppAtomicObjectGetFunction() override {
       // The optimised functions were added in version 1.7 of the GNUstep
       // runtime.
@@ -760,6 +792,7 @@ class CGObjCGNUstep : public CGObjCGNU {
           VersionTuple(1, 7));
       return CxxAtomicObjectGetFn;
     }
+
     llvm::Constant *GetCppAtomicObjectSetFunction() override {
       // The optimised functions were added in version 1.7 of the GNUstep
       // runtime.
@@ -767,6 +800,7 @@ class CGObjCGNUstep : public CGObjCGNU {
           VersionTuple(1, 7));
       return CxxAtomicObjectSetFn;
     }
+
     llvm::Constant *GetOptimizedPropertySetFunction(bool atomic,
                                                     bool copy) override {
       // The optimised property functions omit the GC check, and so are not
@@ -821,32 +855,29 @@ protected:
 
   llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                               llvm::Value *cmd, MessageSendInfo &MSI) override {
-      CGBuilderTy &Builder = CGF.Builder;
-      llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper.getPointer(),
-          PtrToObjCSuperTy), cmd};
+    CGBuilderTy &Builder = CGF.Builder;
+    llvm::Value *lookupArgs[] = {
+        EnforceType(Builder, ObjCSuper.getPointer(), PtrToObjCSuperTy), cmd,
+    };
 
-      if (CGM.ReturnTypeUsesSRet(MSI.CallInfo))
-        return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs);
-      else
-        return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
-    }
+    if (CGM.ReturnTypeUsesSRet(MSI.CallInfo))
+      return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFnSRet, lookupArgs);
+    else
+      return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
+  }
 
-  llvm::Value *GetClassNamed(CodeGenFunction &CGF,
-                             const std::string &Name, bool isWeak) override {
+  llvm::Value *GetClassNamed(CodeGenFunction &CGF, const std::string &Name,
+                             bool isWeak) override {
     if (isWeak)
       return CGObjCGNU::GetClassNamed(CGF, Name, isWeak);
 
     EmitClassRef(Name);
-
     std::string SymbolName = "_OBJC_CLASS_" + Name;
-
     llvm::GlobalVariable *ClassSymbol = TheModule.getGlobalVariable(SymbolName);
-
     if (!ClassSymbol)
       ClassSymbol = new llvm::GlobalVariable(TheModule, LongTy, false,
                                              llvm::GlobalValue::ExternalLinkage,
                                              nullptr, SymbolName);
-
     return ClassSymbol;
   }
 
@@ -865,7 +896,6 @@ public:
 };
 } // end anonymous namespace
 
-
 /// Emits a reference to a dummy variable which is emitted with each class.
 /// This ensures that a linker error will be generated when trying to link
 /// together modules where a referenced class is not defined.
@@ -1021,8 +1051,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
 }
 
 llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF,
-                                      const std::string &Name,
-                                      bool isWeak) {
+                                      const std::string &Name, bool isWeak) {
   llvm::Constant *ClassName = MakeConstantString(Name);
   // With the incompatible ABI, this will need to be replaced with a direct
   // reference to the class symbol.  For the compatible nonfragile ABI we are
@@ -1044,15 +1073,48 @@ llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF,
 // techniques can modify the name -> class mapping.
 llvm::Value *CGObjCGNU::GetClass(CodeGenFunction &CGF,
                                  const ObjCInterfaceDecl *OID) {
-  return GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported());
+  auto *Value =
+      GetClassNamed(CGF, OID->getNameAsString(), OID->isWeakImported());
+  if (CGM.getTriple().isOSBinFormatCOFF()) {
+    if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) {
+      auto DLLStorage = llvm::GlobalValue::DefaultStorageClass;
+      if (OID->hasAttr<DLLExportAttr>())
+        DLLStorage = llvm::GlobalValue::DLLExportStorageClass;
+      else if (OID->hasAttr<DLLImportAttr>())
+        DLLStorage = llvm::GlobalValue::DLLImportStorageClass;
+      ClassSymbol->setDLLStorageClass(DLLStorage);
+    }
+  }
+  return Value;
 }
+
 llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
-  return GetClassNamed(CGF, "NSAutoreleasePool", false);
+  auto *Value  = GetClassNamed(CGF, "NSAutoreleasePool", false);
+  if (CGM.getTriple().isOSBinFormatCOFF()) {
+    if (auto *ClassSymbol = dyn_cast<llvm::GlobalVariable>(Value)) {
+      IdentifierInfo &II = CGF.CGM.getContext().Idents.get("NSAutoreleasePool");
+      TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
+      DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+
+      const VarDecl *VD = nullptr;
+      for (const auto &Result : DC->lookup(&II))
+        if ((VD = dyn_cast<VarDecl>(Result)))
+          break;
+
+      auto DLLStorage = llvm::GlobalValue::DefaultStorageClass;
+      if (!VD || VD->hasAttr<DLLImportAttr>())
+        DLLStorage = llvm::GlobalValue::DLLImportStorageClass;
+      else if (VD->hasAttr<DLLExportAttr>())
+        DLLStorage = llvm::GlobalValue::DLLExportStorageClass;
+
+      ClassSymbol->setDLLStorageClass(DLLStorage);
+    }
+  }
+  return Value;
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
                                     const std::string &TypeEncoding) {
-
   SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel];
   llvm::GlobalAlias *SelValue = nullptr;
 
@@ -1247,8 +1309,6 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
   }
 
   llvm::Value *cmd = GetSelector(CGF, Sel);
-
-
   CallArgList ActualArgs;
 
   ActualArgs.add(RValue::get(EnforceType(Builder, Receiver, IdTy)), ASTIdTy);
@@ -1497,21 +1557,17 @@ GenerateMethodList(StringRef ClassName,
     IMPTy, //Method pointer
     nullptr);
   std::vector<llvm::Constant*> Methods;
-  std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = MethodTypes.size(); i < e; ++i) {
-    Elements.clear();
     llvm::Constant *Method =
       TheModule.getFunction(SymbolNameForMethod(ClassName, CategoryName,
                                                 MethodSels[i],
                                                 isClassMethodList));
     assert(Method && "Can't generate metadata for method that doesn't exist");
     llvm::Constant *C = MakeConstantString(MethodSels[i].getAsString());
-    Elements.push_back(C);
-    Elements.push_back(MethodTypes[i]);
     Method = llvm::ConstantExpr::getBitCast(Method,
         IMPTy);
-    Elements.push_back(Method);
-    Methods.push_back(llvm::ConstantStruct::get(ObjCMethodTy, Elements));
+    Methods.push_back(
+        llvm::ConstantStruct::get(ObjCMethodTy, {C, MethodTypes[i], Method}));
   }
 
   // Array of method structures
@@ -1554,23 +1610,18 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
     IntTy,
     nullptr);
   std::vector<llvm::Constant*> Ivars;
-  std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = IvarNames.size() ; i < e ; i++) {
-    Elements.clear();
-    Elements.push_back(IvarNames[i]);
-    Elements.push_back(IvarTypes[i]);
-    Elements.push_back(IvarOffsets[i]);
-    Ivars.push_back(llvm::ConstantStruct::get(ObjCIvarTy, Elements));
+    Ivars.push_back(llvm::ConstantStruct::get(
+        ObjCIvarTy, {IvarNames[i], IvarTypes[i], IvarOffsets[i]}));
   }
 
   // Array of method structures
   llvm::ArrayType *ObjCIvarArrayTy = llvm::ArrayType::get(ObjCIvarTy,
       IvarNames.size());
 
-
-  Elements.clear();
-  Elements.push_back(llvm::ConstantInt::get(IntTy, (int)IvarNames.size()));
-  Elements.push_back(llvm::ConstantArray::get(ObjCIvarArrayTy, Ivars));
+  llvm::Constant *Elements[] = {
+      llvm::ConstantInt::get(IntTy, (int)IvarNames.size()),
+      llvm::ConstantArray::get(ObjCIvarArrayTy, Ivars)};
   // Structure containing array and array count
   llvm::StructType *ObjCIvarListTy = llvm::StructType::get(IntTy,
     ObjCIvarArrayTy,
@@ -1682,12 +1733,9 @@ GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames,
     PtrToInt8Ty,
     nullptr);
   std::vector<llvm::Constant*> Methods;
-  std::vector<llvm::Constant*> Elements;
   for (unsigned int i = 0, e = MethodTypes.size() ; i < e ; i++) {
-    Elements.clear();
-    Elements.push_back(MethodNames[i]);
-    Elements.push_back(MethodTypes[i]);
-    Methods.push_back(llvm::ConstantStruct::get(ObjCMethodDescTy, Elements));
+    Methods.push_back(llvm::ConstantStruct::get(
+        ObjCMethodDescTy, {MethodNames[i], MethodTypes[i]}));
   }
   llvm::ArrayType *ObjCMethodArrayTy = llvm::ArrayType::get(ObjCMethodDescTy,
       MethodNames.size());
@@ -1762,17 +1810,13 @@ llvm::Constant *CGObjCGNU::GenerateEmptyProtocol(
       MethodList->getType(),
       MethodList->getType(),
       nullptr);
-  std::vector<llvm::Constant*> Elements;
   // The isa pointer must be set to a magic number so the runtime knows it's
   // the correct layout.
-  Elements.push_back(llvm::ConstantExpr::getIntToPtr(
-        llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy));
-  Elements.push_back(MakeConstantString(ProtocolName, ".objc_protocol_name"));
-  Elements.push_back(ProtocolList);
-  Elements.push_back(MethodList);
-  Elements.push_back(MethodList);
-  Elements.push_back(MethodList);
-  Elements.push_back(MethodList);
+  llvm::Constant *Elements[] = {
+      llvm::ConstantExpr::getIntToPtr(
+          llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy),
+      MakeConstantString(ProtocolName, ".objc_protocol_name"), ProtocolList,
+      MethodList, MethodList, MethodList, MethodList};
   return MakeGlobal(ProtocolTy, Elements, CGM.getPointerAlign(),
                     ".objc_protocol");
 }
@@ -1849,7 +1893,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
 
   // Add all of the property methods need adding to the method list and to the
   // property metadata list.
-  for (auto *property : PD->properties()) {
+  for (auto *property : PD->instance_properties()) {
     std::vector<llvm::Constant*> Fields;
 
     Fields.push_back(MakePropertyEncodingString(property, nullptr));
@@ -1920,19 +1964,14 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
       PropertyList->getType(),
       OptionalPropertyList->getType(),
       nullptr);
-  std::vector<llvm::Constant*> Elements;
   // The isa pointer must be set to a magic number so the runtime knows it's
   // the correct layout.
-  Elements.push_back(llvm::ConstantExpr::getIntToPtr(
-        llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy));
-  Elements.push_back(MakeConstantString(ProtocolName, ".objc_protocol_name"));
-  Elements.push_back(ProtocolList);
-  Elements.push_back(InstanceMethodList);
-  Elements.push_back(ClassMethodList);
-  Elements.push_back(OptionalInstanceMethodList);
-  Elements.push_back(OptionalClassMethodList);
-  Elements.push_back(PropertyList);
-  Elements.push_back(OptionalPropertyList);
+  llvm::Constant *Elements[] = {
+      llvm::ConstantExpr::getIntToPtr(
+          llvm::ConstantInt::get(Int32Ty, ProtocolVersion), IdTy),
+      MakeConstantString(ProtocolName, ".objc_protocol_name"), ProtocolList,
+      InstanceMethodList, ClassMethodList, OptionalInstanceMethodList,
+      OptionalClassMethodList, PropertyList, OptionalPropertyList};
   ExistingProtocols[ProtocolName] =
     llvm::ConstantExpr::getBitCast(MakeGlobal(ProtocolTy, Elements,
           CGM.getPointerAlign(), ".objc_protocol"), IdTy);
@@ -2058,20 +2097,20 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
        E = Protos.end(); I != E; ++I)
     Protocols.push_back((*I)->getNameAsString());
 
-  std::vector<llvm::Constant*> Elements;
-  Elements.push_back(MakeConstantString(CategoryName));
-  Elements.push_back(MakeConstantString(ClassName));
-  // Instance method list
-  Elements.push_back(llvm::ConstantExpr::getBitCast(GenerateMethodList(
-          ClassName, CategoryName, InstanceMethodSels, InstanceMethodTypes,
-          false), PtrTy));
-  // Class method list
-  Elements.push_back(llvm::ConstantExpr::getBitCast(GenerateMethodList(
-          ClassName, CategoryName, ClassMethodSels, ClassMethodTypes, true),
-        PtrTy));
-  // Protocol list
-  Elements.push_back(llvm::ConstantExpr::getBitCast(
-        GenerateProtocolList(Protocols), PtrTy));
+  llvm::Constant *Elements[] = {
+      MakeConstantString(CategoryName), MakeConstantString(ClassName),
+      // Instance method list
+      llvm::ConstantExpr::getBitCast(
+          GenerateMethodList(ClassName, CategoryName, InstanceMethodSels,
+                             InstanceMethodTypes, false),
+          PtrTy),
+      // Class method list
+      llvm::ConstantExpr::getBitCast(GenerateMethodList(ClassName, CategoryName,
+                                                        ClassMethodSels,
+                                                        ClassMethodTypes, true),
+                                     PtrTy),
+      // Protocol list
+      llvm::ConstantExpr::getBitCast(GenerateProtocolList(Protocols), PtrTy)};
   Categories.push_back(llvm::ConstantExpr::getBitCast(
         MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
             PtrTy, PtrTy, PtrTy, nullptr), Elements, CGM.getPointerAlign()),
@@ -2167,18 +2206,19 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
 
   // Get the class name
   ObjCInterfaceDecl *ClassDecl =
-    const_cast<ObjCInterfaceDecl *>(OID->getClassInterface());
+      const_cast<ObjCInterfaceDecl *>(OID->getClassInterface());
   std::string ClassName = ClassDecl->getNameAsString();
+
   // Emit the symbol that is used to generate linker errors if this class is
   // referenced in other modules but not declared.
   std::string classSymbolName = "__objc_class_name_" + ClassName;
-  if (llvm::GlobalVariable *symbol =
-      TheModule.getGlobalVariable(classSymbolName)) {
+  if (auto *symbol = TheModule.getGlobalVariable(classSymbolName)) {
     symbol->setInitializer(llvm::ConstantInt::get(LongTy, 0));
   } else {
     new llvm::GlobalVariable(TheModule, LongTy, false,
-    llvm::GlobalValue::ExternalLinkage, llvm::ConstantInt::get(LongTy, 0),
-    classSymbolName);
+                             llvm::GlobalValue::ExternalLinkage,
+                             llvm::ConstantInt::get(LongTy, 0),
+                             classSymbolName);
   }
 
   // Get the size of instances.
@@ -2256,7 +2296,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
     MakeGlobalArray(PtrToIntTy, IvarOffsetValues, CGM.getPointerAlign(),
                     ".ivar.offsets");
 
-
   // Collect information about instance methods
   SmallVector<Selector, 16> InstanceMethodSels;
   SmallVector<llvm::Constant*, 16> InstanceMethodTypes;
@@ -2270,7 +2309,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   llvm::Constant *Properties = GeneratePropertyList(OID, InstanceMethodSels,
           InstanceMethodTypes);
 
-
   // Collect information about class methods
   SmallVector<Selector, 16> ClassMethodSels;
   SmallVector<llvm::Constant*, 16> ClassMethodTypes;
@@ -2343,19 +2381,35 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
       ++ivarIndex;
   }
   llvm::Constant *ZeroPtr = llvm::ConstantInt::get(IntPtrTy, 0);
+
   //Generate metaclass for class methods
-  llvm::Constant *MetaClassStruct = GenerateClassStructure(NULLPtr,
-      NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0], GenerateIvarList(
-        empty, empty, empty), ClassMethodList, NULLPtr,
-      NULLPtr, NULLPtr, ZeroPtr, ZeroPtr, true);
+  llvm::Constant *MetaClassStruct = GenerateClassStructure(
+      NULLPtr, NULLPtr, 0x12L, ClassName.c_str(), nullptr, Zeros[0],
+      GenerateIvarList(empty, empty, empty), ClassMethodList, NULLPtr, NULLPtr,
+      NULLPtr, ZeroPtr, ZeroPtr, true);
+  if (CGM.getTriple().isOSBinFormatCOFF()) {
+    auto Storage = llvm::GlobalValue::DefaultStorageClass;
+    if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
+      Storage = llvm::GlobalValue::DLLImportStorageClass;
+    else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
+      Storage = llvm::GlobalValue::DLLExportStorageClass;
+    cast<llvm::GlobalValue>(MetaClassStruct)->setDLLStorageClass(Storage);
+  }
 
   // Generate the class structure
-  llvm::Constant *ClassStruct =
-    GenerateClassStructure(MetaClassStruct, SuperClass, 0x11L,
-                           ClassName.c_str(), nullptr,
-      llvm::ConstantInt::get(LongTy, instanceSize), IvarList,
-      MethodList, GenerateProtocolList(Protocols), IvarOffsetArray,
-      Properties, StrongIvarBitmap, WeakIvarBitmap);
+  llvm::Constant *ClassStruct = GenerateClassStructure(
+      MetaClassStruct, SuperClass, 0x11L, ClassName.c_str(), nullptr,
+      llvm::ConstantInt::get(LongTy, instanceSize), IvarList, MethodList,
+      GenerateProtocolList(Protocols), IvarOffsetArray, Properties,
+      StrongIvarBitmap, WeakIvarBitmap);
+  if (CGM.getTriple().isOSBinFormatCOFF()) {
+    auto Storage = llvm::GlobalValue::DefaultStorageClass;
+    if (OID->getClassInterface()->hasAttr<DLLImportAttr>())
+      Storage = llvm::GlobalValue::DLLImportStorageClass;
+    else if (OID->getClassInterface()->hasAttr<DLLExportAttr>())
+      Storage = llvm::GlobalValue::DLLExportStorageClass;
+    cast<llvm::GlobalValue>(ClassStruct)->setDLLStorageClass(Storage);
+  }
 
   // Resolve the class aliases, if they exist.
   if (ClassPtrAlias) {
@@ -2376,7 +2430,6 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   Classes.push_back(ClassStruct);
 }
 
-
 llvm::Function *CGObjCGNU::ModuleInitFunction() {
   // Only emit an ObjC load function if no Objective-C stuff has been called
   if (Classes.empty() && Categories.empty() && ConstantStrings.empty() &&
@@ -2651,12 +2704,15 @@ llvm::Constant *CGObjCGNU::GetOptimizedPropertySetFunction(bool atomic,
 llvm::Constant *CGObjCGNU::GetGetStructFunction() {
   return GetStructPropertyFn;
 }
+
 llvm::Constant *CGObjCGNU::GetSetStructFunction() {
   return SetStructPropertyFn;
 }
+
 llvm::Constant *CGObjCGNU::GetCppAtomicObjectGetFunction() {
   return nullptr;
 }
+
 llvm::Constant *CGObjCGNU::GetCppAtomicObjectSetFunction() {
   return nullptr;
 }
@@ -2685,7 +2741,6 @@ void CGObjCGNU::EmitTryStmt(CodeGenFunction &CGF,
   // In Objective-C++ mode, we actually emit something equivalent to the C++
   // exception handler. 
   EmitTryCatchStmt(CGF, S, EnterCatchFn, ExitCatchFn, ExceptionReThrowFn);
-  return ;
 }
 
 void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
@@ -2800,7 +2855,7 @@ llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
     // to replace it with the real version for a library.  In non-PIC code you
     // must compile with the fragile ABI if you want to use ivars from a
     // GCC-compiled class.
-    if (CGM.getLangOpts().PICLevel || CGM.getLangOpts().PIELevel) {
+    if (CGM.getLangOpts().PICLevel) {
       llvm::GlobalVariable *IvarOffsetGV = new llvm::GlobalVariable(TheModule,
             Int32Ty, false,
             llvm::GlobalValue::PrivateLinkage, OffsetGuess, Name+".guess");
@@ -2848,7 +2903,12 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF,
                          const ObjCIvarDecl *Ivar) {
   if (CGM.getLangOpts().ObjCRuntime.isNonFragile()) {
     Interface = FindIvarInterface(CGM.getContext(), Interface, Ivar);
-    if (RuntimeVersion < 10)
+
+    // The MSVC linker cannot have a single global defined as LinkOnceAnyLinkage
+    // and ExternalLinkage, so create a reference to the ivar global and rely on
+    // the definition being created as part of GenerateClass.
+    if (RuntimeVersion < 10 ||
+        CGF.CGM.getTarget().getTriple().isKnownWindowsMSVCEnvironment())
       return CGF.Builder.CreateZExtOrBitCast(
           CGF.Builder.CreateDefaultAlignedLoad(CGF.Builder.CreateAlignedLoad(
                   ObjCIvarOffsetVariable(Interface, Ivar),
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
index e30b287..5ab9fc4 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCMac.cpp
@@ -236,17 +236,14 @@ public:
     CodeGen::CodeGenTypes &Types = CGM.getTypes();
     ASTContext &Ctx = CGM.getContext();
     // id objc_getProperty (id, SEL, ptrdiff_t, bool)
-    SmallVector<CanQualType,4> Params;
     CanQualType IdType = Ctx.getCanonicalParamType(Ctx.getObjCIdType());
     CanQualType SelType = Ctx.getCanonicalParamType(Ctx.getObjCSelType());
-    Params.push_back(IdType);
-    Params.push_back(SelType);
-    Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
-    Params.push_back(Ctx.BoolTy);
+    CanQualType Params[] = {
+        IdType, SelType,
+        Ctx.getPointerDiffType()->getCanonicalTypeUnqualified(), Ctx.BoolTy};
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            IdType, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(IdType, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_getProperty");
   }
 
@@ -254,19 +251,18 @@ public:
     CodeGen::CodeGenTypes &Types = CGM.getTypes();
     ASTContext &Ctx = CGM.getContext();
     // void objc_setProperty (id, SEL, ptrdiff_t, id, bool, bool)
-    SmallVector<CanQualType,6> Params;
     CanQualType IdType = Ctx.getCanonicalParamType(Ctx.getObjCIdType());
     CanQualType SelType = Ctx.getCanonicalParamType(Ctx.getObjCSelType());
-    Params.push_back(IdType);
-    Params.push_back(SelType);
-    Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
-    Params.push_back(IdType);
-    Params.push_back(Ctx.BoolTy);
-    Params.push_back(Ctx.BoolTy);
+    CanQualType Params[] = {
+        IdType,
+        SelType,
+        Ctx.getPointerDiffType()->getCanonicalTypeUnqualified(),
+        IdType,
+        Ctx.BoolTy,
+        Ctx.BoolTy};
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_setProperty");
   }
 
@@ -290,9 +286,8 @@ public:
     Params.push_back(IdType);
     Params.push_back(Ctx.getPointerDiffType()->getCanonicalTypeUnqualified());
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     const char *name;
     if (atomic && copy)
       name = "objc_setProperty_atomic_copy";
@@ -317,9 +312,8 @@ public:
     Params.push_back(Ctx.BoolTy);
     Params.push_back(Ctx.BoolTy);
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_copyStruct");
   }
   
@@ -336,10 +330,8 @@ public:
     Params.push_back(Ctx.VoidPtrTy);
     Params.push_back(Ctx.VoidPtrTy);
     llvm::FunctionType *FTy =
-      Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(Ctx.VoidTy, false, false,
-                                                          Params,
-                                                          FunctionType::ExtInfo(),
-                                                          RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_copyCppObjectAtomic");
   }
   
@@ -350,12 +342,25 @@ public:
     SmallVector<CanQualType,1> Params;
     Params.push_back(Ctx.getCanonicalParamType(Ctx.getObjCIdType()));
     llvm::FunctionType *FTy =
-        Types.GetFunctionType(Types.arrangeLLVMFunctionInfo(
-            Ctx.VoidTy, false, false, Params, FunctionType::ExtInfo(),
-            RequiredArgs::All));
+        Types.GetFunctionType(
+          Types.arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Params));
     return CGM.CreateRuntimeFunction(FTy, "objc_enumerationMutation");
   }
 
+  llvm::Constant *getLookUpClassFn() {
+    CodeGen::CodeGenTypes &Types = CGM.getTypes();
+    ASTContext &Ctx = CGM.getContext();
+    // Class objc_lookUpClass (const char *)
+    SmallVector<CanQualType,1> Params;
+    Params.push_back(
+      Ctx.getCanonicalType(Ctx.getPointerType(Ctx.CharTy.withConst())));
+    llvm::FunctionType *FTy =
+        Types.GetFunctionType(Types.arrangeBuiltinFunctionDeclaration(
+                                Ctx.getCanonicalType(Ctx.getObjCClassType()),
+                                Params));
+    return CGM.CreateRuntimeFunction(FTy, "objc_lookUpClass");
+  }
+
   /// GcReadWeakFn -- LLVM objc_read_weak (id *src) function.
   llvm::Constant *getGcReadWeakFn() {
     // id objc_read_weak (id *)
@@ -576,7 +581,6 @@ public:
     return CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(CGM.Int32Ty, params, false),
       "objc_exception_match");
-
   }
 
   /// SetJmpFn - LLVM _setjmp function.
@@ -600,7 +604,6 @@ public:
 /// modern abi
 class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper {
 public:
-
   // MethodListnfABITy - LLVM for struct _method_list_t
   llvm::StructType *MethodListnfABITy;
 
@@ -967,7 +970,8 @@ protected:
   llvm::Constant *EmitPropertyList(Twine Name,
                                    const Decl *Container,
                                    const ObjCContainerDecl *OCD,
-                                   const ObjCCommonTypesHelper &ObjCTypes);
+                                   const ObjCCommonTypesHelper &ObjCTypes,
+                                   bool IsClassProperty);
 
   /// EmitProtocolMethodTypes - Generate the array of extended method type 
   /// strings. The return value has type Int8PtrPtrTy.
@@ -981,13 +985,20 @@ protected:
     SmallVectorImpl<llvm::Constant*> &Properties,
     const Decl *Container,
     const ObjCProtocolDecl *Proto,
-    const ObjCCommonTypesHelper &ObjCTypes);
+    const ObjCCommonTypesHelper &ObjCTypes,
+    bool IsClassProperty);
 
   /// GetProtocolRef - Return a reference to the internal protocol
   /// description, creating an empty one if it has not been
   /// defined. The return value has type ProtocolPtrTy.
   llvm::Constant *GetProtocolRef(const ObjCProtocolDecl *PD);
 
+  /// Return a reference to the given Class using runtime calls rather than
+  /// by a symbol reference.
+  llvm::Value *EmitClassRefViaRuntime(CodeGenFunction &CGF,
+                                      const ObjCInterfaceDecl *ID,
+                                      ObjCCommonTypesHelper &ObjCTypes);
+
 public:
   /// CreateMetadataVar - Create a global variable with internal
   /// linkage for use by the Objective-C runtime.
@@ -1079,7 +1090,8 @@ private:
   /// has type ClassExtensionPtrTy.
   llvm::Constant *EmitClassExtension(const ObjCImplementationDecl *ID,
                                      CharUnits instanceSize,
-                                     bool hasMRCWeakIvars);
+                                     bool hasMRCWeakIvars,
+                                     bool isClassProperty);
 
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given class.
@@ -1119,9 +1131,8 @@ private:
 
   /// EmitMethodList - Emit the method list for the given
   /// implementation. The return value has type MethodListPtrTy.
-  llvm::Constant *EmitMethodList(Twine Name,
-                                 const char *Section,
-                                 ArrayRef<llvm::Constant*> Methods);
+  llvm::Constant *EmitMethodList(Twine Name, StringRef Section,
+                                 ArrayRef<llvm::Constant *> Methods);
 
   /// EmitMethodDescList - Emit a method description list for a list of
   /// method declarations.
@@ -1134,9 +1145,8 @@ private:
   ///  - begin, end: The method list to output.
   ///
   /// The return value has type MethodDescriptionListPtrTy.
-  llvm::Constant *EmitMethodDescList(Twine Name,
-                                     const char *Section,
-                                     ArrayRef<llvm::Constant*> Methods);
+  llvm::Constant *EmitMethodDescList(Twine Name, StringRef Section,
+                                     ArrayRef<llvm::Constant *> Methods);
 
   /// GetOrEmitProtocol - Get the protocol object for the given
   /// declaration, emitting it if necessary. The return value has type
@@ -1255,7 +1265,7 @@ public:
 
   /// GetClassGlobal - Return the global variable for the Objective-C
   /// class of the given name.
-  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+  llvm::GlobalVariable *GetClassGlobal(StringRef Name,
                                        bool Weak = false) override {
     llvm_unreachable("CGObjCMac::GetClassGlobal");
   }
@@ -1293,9 +1303,8 @@ private:
 
   /// AddModuleClassList - Add the given list of class pointers to the
   /// module with the provided symbol and section names.
-  void AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container,
-                          const char *SymbolName,
-                          const char *SectionName);
+  void AddModuleClassList(ArrayRef<llvm::GlobalValue *> Container,
+                          StringRef SymbolName, StringRef SectionName);
 
   llvm::GlobalVariable * BuildClassRoTInitializer(unsigned flags,
                                               unsigned InstanceStart,
@@ -1314,9 +1323,8 @@ private:
 
   /// EmitMethodList - Emit the method list for the given
   /// implementation. The return value has type MethodListnfABITy.
-  llvm::Constant *EmitMethodList(Twine Name,
-                                 const char *Section,
-                                 ArrayRef<llvm::Constant*> Methods);
+  llvm::Constant *EmitMethodList(Twine Name, StringRef Section,
+                                 ArrayRef<llvm::Constant *> Methods);
   /// EmitIvarList - Emit the ivar list for the given
   /// implementation. If ForClass is true the list of class ivars
   /// (i.e. metaclass ivars) is emitted, otherwise the list of
@@ -1357,7 +1365,7 @@ private:
   
   /// GetClassGlobal - Return the global variable for the Objective-C
   /// class of the given name.
-  llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+  llvm::GlobalVariable *GetClassGlobal(StringRef Name,
                                        bool Weak = false) override;
 
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
@@ -1398,13 +1406,9 @@ private:
   llvm::Constant *GetInterfaceEHType(const ObjCInterfaceDecl *ID,
                                   bool ForDefinition);
 
-  const char *getMetaclassSymbolPrefix() const {
-    return "OBJC_METACLASS_$_";
-  }
+  StringRef getMetaclassSymbolPrefix() const { return "OBJC_METACLASS_$_"; }
 
-  const char *getClassSymbolPrefix() const {
-    return "OBJC_CLASS_$_";
-  }
+  StringRef getClassSymbolPrefix() const { return "OBJC_CLASS_$_"; }
 
   void GetClassSizeInfo(const ObjCImplementationDecl *OID,
                         uint32_t &InstanceStart,
@@ -1506,12 +1510,15 @@ public:
   llvm::Constant *GetSetStructFunction() override {
     return ObjCTypes.getCopyStructFn();
   }
+
   llvm::Constant *GetGetStructFunction() override {
     return ObjCTypes.getCopyStructFn();
   }
+
   llvm::Constant *GetCppAtomicObjectSetFunction() override {
     return ObjCTypes.getCppAtomicObjectFunction();
   }
+
   llvm::Constant *GetCppAtomicObjectGetFunction() override {
     return ObjCTypes.getCppAtomicObjectFunction();
   }
@@ -1934,7 +1941,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
   // Emit a null-check if there's a consumed argument other than the receiver.
   bool RequiresNullCheck = false;
   if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) {
-    for (const auto *ParamDecl : Method->params()) {
+    for (const auto *ParamDecl : Method->parameters()) {
       if (ParamDecl->hasAttr<NSConsumedAttr>()) {
         if (!nullReturn.NullBB)
           nullReturn.init(CGF, Arg0);
@@ -2027,6 +2034,7 @@ namespace {
     bool IsDisordered = false;
 
     llvm::SmallVector<IvarInfo, 8> IvarsInfo;
+
   public:
     IvarLayoutBuilder(CodeGenModule &CGM, CharUnits instanceBegin,
                       CharUnits instanceEnd, bool forStrongLayout)
@@ -2062,7 +2070,7 @@ namespace {
       printf("\n");
     }
   };
-}
+} // end anonymous namespace
 
 llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM,
                                                 const CGBlockInfo &blockInfo) {
@@ -2141,7 +2149,6 @@ void IvarLayoutBuilder::visitBlock(const CGBlockInfo &blockInfo) {
   }
 }
 
-
 /// getBlockCaptureLifetime - This routine returns life time of the captured
 /// block variable for the purpose of block layout meta-data generation. FQT is
 /// the type of the variable captured in the block.
@@ -2629,7 +2636,6 @@ llvm::Constant *CGObjCCommonMac::BuildRCBlockLayout(CodeGenModule &CGM,
   return getBitmapBlockLayout(false);
 }
 
-
 llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM,
                                                   QualType T) {
   assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
@@ -2677,6 +2683,25 @@ llvm::Constant *CGObjCCommonMac::GetProtocolRef(const ObjCProtocolDecl *PD) {
   return GetOrEmitProtocolRef(PD);
 }
 
+llvm::Value *CGObjCCommonMac::EmitClassRefViaRuntime(
+               CodeGenFunction &CGF,
+               const ObjCInterfaceDecl *ID,
+               ObjCCommonTypesHelper &ObjCTypes) {
+  llvm::Constant *lookUpClassFn = ObjCTypes.getLookUpClassFn();
+
+  llvm::Value *className =
+      CGF.CGM.GetAddrOfConstantCString(ID->getObjCRuntimeNameAsString())
+        .getPointer();
+  ASTContext &ctx = CGF.CGM.getContext();
+  className =
+      CGF.Builder.CreateBitCast(className,
+                                CGF.ConvertType(
+                                  ctx.getPointerType(ctx.CharTy.withConst())));
+  llvm::CallInst *call = CGF.Builder.CreateCall(lookUpClassFn, className);
+  call->setDoesNotThrow();
+  return call;
+}
+
 /*
 // Objective-C 1.0 extensions
 struct _objc_protocol {
@@ -2798,6 +2823,7 @@ llvm::Constant *CGObjCMac::GetOrEmitProtocolRef(const ObjCProtocolDecl *PD) {
   struct objc_method_description_list *optional_class_methods;
   struct objc_property_list *instance_properties;
   const char ** extendedMethodTypes;
+  struct objc_property_list *class_properties;
   };
 */
 llvm::Constant *
@@ -2816,13 +2842,16 @@ CGObjCMac::EmitProtocolExtension(const ObjCProtocolDecl *PD,
                          "__OBJC,__cat_cls_meth,regular,no_dead_strip",
                          OptClassMethods),
       EmitPropertyList("OBJC_$_PROP_PROTO_LIST_" + PD->getName(), nullptr, PD,
-                       ObjCTypes),
+                       ObjCTypes, false),
       EmitProtocolMethodTypes("OBJC_PROTOCOL_METHOD_TYPES_" + PD->getName(),
-                              MethodTypesExt, ObjCTypes)};
+                              MethodTypesExt, ObjCTypes),
+      EmitPropertyList("OBJC_$_CLASS_PROP_PROTO_LIST_" + PD->getName(), nullptr,
+                       PD, ObjCTypes, true)};
 
   // Return null if no extension bits are used.
   if (Values[1]->isNullValue() && Values[2]->isNullValue() &&
-      Values[3]->isNullValue() && Values[4]->isNullValue())
+      Values[3]->isNullValue() && Values[4]->isNullValue() &&
+      Values[5]->isNullValue())
     return llvm::Constant::getNullValue(ObjCTypes.ProtocolExtensionPtrTy);
 
   llvm::Constant *Init =
@@ -2878,10 +2907,15 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet,
                        SmallVectorImpl<llvm::Constant *> &Properties,
                        const Decl *Container,
                        const ObjCProtocolDecl *Proto,
-                       const ObjCCommonTypesHelper &ObjCTypes) {
+                       const ObjCCommonTypesHelper &ObjCTypes,
+                       bool IsClassProperty) {
   for (const auto *P : Proto->protocols()) 
-    PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
+    PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes,
+                           IsClassProperty);
+
   for (const auto *PD : Proto->properties()) {
+    if (IsClassProperty != PD->isClassProperty())
+      continue;
     if (!PropertySet.insert(PD->getIdentifier()).second)
       continue;
     llvm::Constant *Prop[] = {
@@ -2907,7 +2941,17 @@ PushProtocolProperties(llvm::SmallPtrSet<const IdentifierInfo*,16> &PropertySet,
 llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
                                        const Decl *Container,
                                        const ObjCContainerDecl *OCD,
-                                       const ObjCCommonTypesHelper &ObjCTypes) {
+                                       const ObjCCommonTypesHelper &ObjCTypes,
+                                       bool IsClassProperty) {
+  if (IsClassProperty) {
+    // Make this entry NULL for OS X with deployment target < 10.11, for iOS
+    // with deployment target < 9.0.
+    const llvm::Triple &Triple = CGM.getTarget().getTriple();
+    if ((Triple.isMacOSX() && Triple.isMacOSXVersionLT(10, 11)) ||
+        (Triple.isiOS() && Triple.isOSVersionLT(9)))
+      return llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
+  }
+
   SmallVector<llvm::Constant *, 16> Properties;
   llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
 
@@ -2919,10 +2963,15 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
   if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD))
     for (const ObjCCategoryDecl *ClassExt : OID->known_extensions())
       for (auto *PD : ClassExt->properties()) {
+        if (IsClassProperty != PD->isClassProperty())
+          continue;
         PropertySet.insert(PD->getIdentifier());
         AddProperty(PD);
       }
+
   for (const auto *PD : OCD->properties()) {
+    if (IsClassProperty != PD->isClassProperty())
+      continue;
     // Don't emit duplicate metadata for properties that were already in a
     // class extension.
     if (!PropertySet.insert(PD->getIdentifier()).second)
@@ -2932,11 +2981,13 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
 
   if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) {
     for (const auto *P : OID->all_referenced_protocols())
-      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
+      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes,
+                             IsClassProperty);
   }
   else if (const ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(OCD)) {
     for (const auto *P : CD->protocols())
-      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
+      PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes,
+                             IsClassProperty);
   }
 
   // Return null for empty list.
@@ -3001,8 +3052,8 @@ CGObjCMac::GetMethodDescriptionConstant(const ObjCMethodDecl *MD) {
 }
 
 llvm::Constant *
-CGObjCMac::EmitMethodDescList(Twine Name, const char *Section,
-                              ArrayRef<llvm::Constant*> Methods) {
+CGObjCMac::EmitMethodDescList(Twine Name, StringRef Section,
+                              ArrayRef<llvm::Constant *> Methods) {
   // Return null for empty list.
   if (Methods.empty())
     return llvm::Constant::getNullValue(ObjCTypes.MethodDescriptionListPtrTy);
@@ -3029,6 +3080,7 @@ CGObjCMac::EmitMethodDescList(Twine Name, const char *Section,
   struct _objc_protocol_list *protocols;
   uint32_t size; // <rdar://4585769>
   struct _objc_property_list *instance_properties;
+  struct _objc_property_list *class_properties;
   };
 */
 void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
@@ -3055,7 +3107,7 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
     // Class methods should always be defined.
     ClassMethods.push_back(GetMethodConstant(I));
 
-  llvm::Constant *Values[7];
+  llvm::Constant *Values[8];
   Values[0] = GetClassName(OCD->getName());
   Values[1] = GetClassName(Interface->getObjCRuntimeNameAsString());
   LazySymbols.insert(Interface->getIdentifier());
@@ -3077,9 +3129,12 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   // If there is no category @interface then there can be no properties.
   if (Category) {
     Values[6] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(),
-                                 OCD, Category, ObjCTypes);
+                                 OCD, Category, ObjCTypes, false);
+    Values[7] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(),
+                                 OCD, Category, ObjCTypes, true);
   } else {
     Values[6] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
+    Values[7] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
   }
 
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.CategoryTy,
@@ -3274,7 +3329,8 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
   Values[ 8] = llvm::Constant::getNullValue(ObjCTypes.CachePtrTy);
   Values[ 9] = Protocols;
   Values[10] = BuildStrongIvarLayout(ID, CharUnits::Zero(), Size);
-  Values[11] = EmitClassExtension(ID, Size, hasMRCWeak);
+  Values[11] = EmitClassExtension(ID, Size, hasMRCWeak,
+                                  false/*isClassProperty*/);
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy,
                                                    Values);
   std::string Name("OBJC_CLASS_");
@@ -3338,8 +3394,9 @@ llvm::Constant *CGObjCMac::EmitMetaClass(const ObjCImplementationDecl *ID,
   Values[ 9] = Protocols;
   // ivar_layout for metaclass is always NULL.
   Values[10] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy);
-  // The class extension is always unused for metaclasses.
-  Values[11] = llvm::Constant::getNullValue(ObjCTypes.ClassExtensionPtrTy);
+  // The class extension is used to store class properties for metaclasses.
+  Values[11] = EmitClassExtension(ID, CharUnits::Zero(), false/*hasMRCWeak*/,
+                                  true/*isClassProperty*/);
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy,
                                                    Values);
 
@@ -3413,19 +3470,28 @@ llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) {
 */
 llvm::Constant *
 CGObjCMac::EmitClassExtension(const ObjCImplementationDecl *ID,
-                              CharUnits InstanceSize, bool hasMRCWeakIvars) {
+                              CharUnits InstanceSize, bool hasMRCWeakIvars,
+                              bool isClassProperty) {
   uint64_t Size =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassExtensionTy);
 
   llvm::Constant *Values[3];
   Values[0] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
-  Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize,
-                                  hasMRCWeakIvars);
-  Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(),
-                               ID, ID->getClassInterface(), ObjCTypes);
+  if (isClassProperty) {
+    llvm::Type *PtrTy = CGM.Int8PtrTy;
+    Values[1] = llvm::Constant::getNullValue(PtrTy);
+  } else
+    Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize,
+                                    hasMRCWeakIvars);
+  if (isClassProperty)
+    Values[2] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getName(),
+                                 ID, ID->getClassInterface(), ObjCTypes, true);
+  else
+    Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(),
+                                 ID, ID->getClassInterface(), ObjCTypes, false);
 
   // Return null if no extension bits are used.
-  if (Values[1]->isNullValue() && Values[2]->isNullValue())
+  if ((!Values[1] || Values[1]->isNullValue()) && Values[2]->isNullValue())
     return llvm::Constant::getNullValue(ObjCTypes.ClassExtensionPtrTy);
 
   llvm::Constant *Init =
@@ -3530,9 +3596,8 @@ llvm::Constant *CGObjCMac::GetMethodConstant(const ObjCMethodDecl *MD) {
   return llvm::ConstantStruct::get(ObjCTypes.MethodTy, Method);
 }
 
-llvm::Constant *CGObjCMac::EmitMethodList(Twine Name,
-                                          const char *Section,
-                                          ArrayRef<llvm::Constant*> Methods) {
+llvm::Constant *CGObjCMac::EmitMethodList(Twine Name, StringRef Section,
+                                          ArrayRef<llvm::Constant *> Methods) {
   // Return null for empty list.
   if (Methods.empty())
     return llvm::Constant::getNullValue(ObjCTypes.MethodListPtrTy);
@@ -3607,6 +3672,7 @@ llvm::Constant *CGObjCMac::GetOptimizedPropertySetFunction(bool atomic,
 llvm::Constant *CGObjCMac::GetGetStructFunction() {
   return ObjCTypes.getCopyStructFn();
 }
+
 llvm::Constant *CGObjCMac::GetSetStructFunction() {
   return ObjCTypes.getCopyStructFn();
 }
@@ -3614,6 +3680,7 @@ llvm::Constant *CGObjCMac::GetSetStructFunction() {
 llvm::Constant *CGObjCMac::GetCppAtomicObjectGetFunction() {
   return ObjCTypes.getCppAtomicObjectFunction();
 }
+
 llvm::Constant *CGObjCMac::GetCppAtomicObjectSetFunction() {
   return ObjCTypes.getCppAtomicObjectFunction();
 }
@@ -3711,7 +3778,7 @@ namespace {
     void emitWriteHazard();
     void emitHazardsInNewBlocks();
   };
-}
+} // end anonymous namespace
 
 /// Create the fragile-ABI read and write hazards based on the current
 /// state of the function, which is presumed to be immediately prior
@@ -4332,7 +4399,6 @@ void CGObjCMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
   llvm::Value *args[] = { src, dst.getPointer() };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignWeakFn(),
                               args, "weakassign");
-  return;
 }
 
 /// EmitObjCGlobalAssign - Code gen for assigning to a __strong object.
@@ -4358,7 +4424,6 @@ void CGObjCMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
   else
     CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignThreadLocalFn(),
                                 args, "threadlocalassign");
-  return;
 }
 
 /// EmitObjCIvarAssign - Code gen for assigning to a __strong object.
@@ -4380,7 +4445,6 @@ void CGObjCMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
   llvm::Value *args[] = { src, dst.getPointer(), ivarOffset };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignIvarFn(), args);
-  return;
 }
 
 /// EmitObjCStrongCastAssign - Code gen for assigning to a __strong cast object.
@@ -4401,7 +4465,6 @@ void CGObjCMac::EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
   llvm::Value *args[] = { src, dst.getPointer() };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignStrongCastFn(),
                               args, "strongassign");
-  return;
 }
 
 void CGObjCMac::EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
@@ -4455,7 +4518,8 @@ enum ImageInfoFlags {
   // A flag indicating that the module has no instances of a @synthesize of a
   // superclass variable. <rdar://problem/6803242>
   eImageInfo_CorrectedSynthesize = (1 << 4), // This flag is no longer set by clang.
-  eImageInfo_ImageIsSimulated    = (1 << 5)
+  eImageInfo_ImageIsSimulated    = (1 << 5),
+  eImageInfo_ClassProperties     = (1 << 6)
 };
 
 void CGObjCCommonMac::EmitImageInfo() {
@@ -4507,6 +4571,10 @@ void CGObjCCommonMac::EmitImageInfo() {
        Triple.getArch() == llvm::Triple::x86_64))
     Mod.addModuleFlag(llvm::Module::Error, "Objective-C Is Simulated",
                       eImageInfo_ImageIsSimulated);
+
+  // Indicate whether we are generating class properties.
+  Mod.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties",
+                    eImageInfo_ClassProperties);
 }
 
 // struct objc_module {
@@ -4602,6 +4670,11 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF,
 
 llvm::Value *CGObjCMac::EmitClassRef(CodeGenFunction &CGF,
                                      const ObjCInterfaceDecl *ID) {
+  // If the class has the objc_runtime_visible attribute, we need to
+  // use the Objective-C runtime to get the class.
+  if (ID->hasAttr<ObjCRuntimeVisibleAttr>())
+    return EmitClassRefViaRuntime(CGF, ID, ObjCTypes);
+
   return EmitClassRefFromId(CGF, ID->getIdentifier());
 }
 
@@ -4954,7 +5027,7 @@ CGObjCCommonMac::BuildIvarLayout(const ObjCImplementationDecl *OMD,
       baseOffset = CharUnits::Zero();
     }
 
-    baseOffset = baseOffset.RoundUpToAlignment(CGM.getPointerAlign());
+    baseOffset = baseOffset.alignTo(CGM.getPointerAlign());
   }
   else {
     CGM.getContext().DeepCollectObjCIvars(OI, true, ivars);
@@ -5131,9 +5204,8 @@ void CGObjCMac::FinishModule() {
 }
 
 CGObjCNonFragileABIMac::CGObjCNonFragileABIMac(CodeGen::CodeGenModule &cgm)
-  : CGObjCCommonMac(cgm),
-    ObjCTypes(cgm) {
-  ObjCEmptyCacheVar = ObjCEmptyVtableVar = nullptr;
+    : CGObjCCommonMac(cgm), ObjCTypes(cgm), ObjCEmptyCacheVar(nullptr),
+      ObjCEmptyVtableVar(nullptr) {
   ObjCABI = 2;
 }
 
@@ -5223,7 +5295,6 @@ ObjCCommonTypesHelper::ObjCCommonTypesHelper(CodeGen::CodeGenModule &cgm)
   // struct _objc_cache *
   CacheTy = llvm::StructType::create(VMContext, "struct._objc_cache");
   CachePtrTy = llvm::PointerType::getUnqual(CacheTy);
-    
 }
 
 ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
@@ -5256,12 +5327,13 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   struct _objc_method_description_list *optional_class_methods;
   //   struct _objc_property_list *instance_properties;
   //   const char ** extendedMethodTypes;
+  //   struct _objc_property_list *class_properties;
   // }
   ProtocolExtensionTy =
     llvm::StructType::create("struct._objc_protocol_extension",
                              IntTy, MethodDescriptionListPtrTy,
                              MethodDescriptionListPtrTy, PropertyListPtrTy,
-                             Int8PtrPtrTy, nullptr);
+                             Int8PtrPtrTy, PropertyListPtrTy, nullptr);
 
   // struct _objc_protocol_extension *
   ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy);
@@ -5359,14 +5431,17 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
   //   char *class_name;
   //   struct _objc_method_list *instance_method;
   //   struct _objc_method_list *class_method;
+  //   struct _objc_protocol_list *protocols;
   //   uint32_t size;  // sizeof(struct _objc_category)
   //   struct _objc_property_list *instance_properties;// category's @property
+  //   struct _objc_property_list *class_properties;
   // }
   CategoryTy =
     llvm::StructType::create("struct._objc_category",
                              Int8PtrTy, Int8PtrTy, MethodListPtrTy,
                              MethodListPtrTy, ProtocolListPtrTy,
-                             IntTy, PropertyListPtrTy, nullptr);
+                             IntTy, PropertyListPtrTy, PropertyListPtrTy,
+                             nullptr);
 
   // Global metadata structures
 
@@ -5405,7 +5480,6 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm)
     llvm::StructType::create("struct._objc_exception_data",
                              llvm::ArrayType::get(CGM.Int32Ty,SetJmpBufferSize),
                              StackPtrTy, nullptr);
-
 }
 
 ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModule &cgm)
@@ -5434,6 +5508,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   const uint32_t flags;  // = 0
   //   const char ** extendedMethodTypes;
   //   const char *demangledName;
+  //   const struct _prop_list_t * class_properties;
   // }
 
   // Holder for struct _protocol_list_t *
@@ -5446,7 +5521,7 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
                              MethodListnfABIPtrTy, MethodListnfABIPtrTy,
                              MethodListnfABIPtrTy, MethodListnfABIPtrTy,
                              PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy,
-                             Int8PtrTy,
+                             Int8PtrTy, PropertyListPtrTy,
                              nullptr);
 
   // struct _protocol_t*
@@ -5539,6 +5614,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
   //   const struct _method_list_t * const class_methods;
   //   const struct _protocol_list_t * const protocols;
   //   const struct _prop_list_t * const properties;
+  //   const struct _prop_list_t * const class_properties;
+  //   const uint32_t size;
   // }
   CategorynfABITy = llvm::StructType::create("struct._category_t",
                                              Int8PtrTy, ClassnfABIPtrTy,
@@ -5546,6 +5623,8 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul
                                              MethodListnfABIPtrTy,
                                              ProtocolListnfABIPtrTy,
                                              PropertyListPtrTy,
+                                             PropertyListPtrTy,
+                                             IntTy,
                                              nullptr);
 
   // New types for nonfragile abi messaging.
@@ -5609,10 +5688,9 @@ llvm::Function *CGObjCNonFragileABIMac::ModuleInitFunction() {
   return nullptr;
 }
 
-void CGObjCNonFragileABIMac::
-AddModuleClassList(ArrayRef<llvm::GlobalValue*> Container,
-                   const char *SymbolName,
-                   const char *SectionName) {
+void CGObjCNonFragileABIMac::AddModuleClassList(
+    ArrayRef<llvm::GlobalValue *> Container, StringRef SymbolName,
+    StringRef SectionName) {
   unsigned NumClasses = Container.size();
 
   if (!NumClasses)
@@ -5814,13 +5892,16 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
   if (flags & NonFragileABI_Class_Meta) {
     Values[ 7] = llvm::Constant::getNullValue(ObjCTypes.IvarListnfABIPtrTy);
     Values[ 8] = GetIvarLayoutName(nullptr, ObjCTypes);
-    Values[ 9] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
+    Values[ 9] = EmitPropertyList(
+        "\01l_OBJC_$_CLASS_PROP_LIST_" + ID->getObjCRuntimeNameAsString(),
+        ID, ID->getClassInterface(), ObjCTypes, true);
   } else {
     Values[ 7] = EmitIvarList(ID);
     Values[ 8] = BuildWeakIvarLayout(ID, beginInstance, endInstance,
                                      hasMRCWeak);
-    Values[ 9] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(),
-                                  ID, ID->getClassInterface(), ObjCTypes);
+    Values[ 9] = EmitPropertyList(
+        "\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(),
+        ID, ID->getClassInterface(), ObjCTypes, false);
   }
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassRonfABITy,
                                                    Values);
@@ -5870,8 +5951,9 @@ llvm::GlobalVariable *CGObjCNonFragileABIMac::BuildClassMetaData(
   GV->setSection("__DATA, __objc_data");
   GV->setAlignment(
     CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABITy));
-  if (HiddenVisibility)
-    GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  if (!CGM.getTriple().isOSBinFormatCOFF())
+    if (HiddenVisibility)
+      GV->setVisibility(llvm::GlobalValue::HiddenVisibility);
   return GV;
 }
 
@@ -5896,49 +5978,60 @@ void CGObjCNonFragileABIMac::GetClassSizeInfo(const ObjCImplementationDecl *OID,
     InstanceStart = RL.getFieldOffset(0) / CGM.getContext().getCharWidth();
 }
 
+static llvm::GlobalValue::DLLStorageClassTypes getStorage(CodeGenModule &CGM,
+                                                          StringRef Name) {
+  IdentifierInfo &II = CGM.getContext().Idents.get(Name);
+  TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
+  DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+
+  const VarDecl *VD = nullptr;
+  for (const auto &Result : DC->lookup(&II))
+    if ((VD = dyn_cast<VarDecl>(Result)))
+      break;
+
+  if (!VD)
+    return llvm::GlobalValue::DLLImportStorageClass;
+  if (VD->hasAttr<DLLExportAttr>())
+    return llvm::GlobalValue::DLLExportStorageClass;
+  if (VD->hasAttr<DLLImportAttr>())
+    return llvm::GlobalValue::DLLImportStorageClass;
+  return llvm::GlobalValue::DefaultStorageClass;
+}
+
 void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
-  std::string ClassName = ID->getObjCRuntimeNameAsString();
   if (!ObjCEmptyCacheVar) {
-    ObjCEmptyCacheVar = new llvm::GlobalVariable(
-      CGM.getModule(),
-      ObjCTypes.CacheTy,
-      false,
-      llvm::GlobalValue::ExternalLinkage,
-      nullptr,
-      "_objc_empty_cache");
-
-    // Make this entry NULL for any iOS device target, any iOS simulator target,
-    // OS X with deployment target 10.9 or later.
+    ObjCEmptyCacheVar =
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.CacheTy, false,
+                                 llvm::GlobalValue::ExternalLinkage, nullptr,
+                                 "_objc_empty_cache");
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      ObjCEmptyCacheVar->setDLLStorageClass(getStorage(CGM, "_objc_empty_cache"));
+
+    // Only OS X with deployment version <10.9 use the empty vtable symbol
     const llvm::Triple &Triple = CGM.getTarget().getTriple();
-    if (Triple.isiOS() || Triple.isWatchOS() ||
-        (Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 9)))
-      // This entry will be null.
-      ObjCEmptyVtableVar = nullptr;
-    else
-      ObjCEmptyVtableVar = new llvm::GlobalVariable(
-                                                    CGM.getModule(),
-                                                    ObjCTypes.ImpnfABITy,
-                                                    false,
-                                                    llvm::GlobalValue::ExternalLinkage,
-                                                    nullptr,
-                                                    "_objc_empty_vtable");
-  }
-  assert(ID->getClassInterface() &&
-         "CGObjCNonFragileABIMac::GenerateClass - class is 0");
+    if (Triple.isMacOSX() && Triple.isMacOSXVersionLT(10, 9))
+      ObjCEmptyVtableVar =
+          new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ImpnfABITy, false,
+                                   llvm::GlobalValue::ExternalLinkage, nullptr,
+                                   "_objc_empty_vtable");
+  }
+
   // FIXME: Is this correct (that meta class size is never computed)?
   uint32_t InstanceStart =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassnfABITy);
   uint32_t InstanceSize = InstanceStart;
   uint32_t flags = NonFragileABI_Class_Meta;
-  llvm::SmallString<64> ObjCMetaClassName(getMetaclassSymbolPrefix());
-  llvm::SmallString<64> ObjCClassName(getClassSymbolPrefix());
-  llvm::SmallString<64> TClassName;
 
   llvm::GlobalVariable *SuperClassGV, *IsAGV;
 
+  StringRef ClassName = ID->getObjCRuntimeNameAsString();
+  const auto *CI = ID->getClassInterface();
+  assert(CI && "CGObjCNonFragileABIMac::GenerateClass - class is 0");
+
   // Build the flags for the metaclass.
-  bool classIsHidden =
-    ID->getClassInterface()->getVisibility() == HiddenVisibility;
+  bool classIsHidden = (CGM.getTriple().isOSBinFormatCOFF())
+                           ? !CI->hasAttr<DLLExportAttr>()
+                           : CI->getVisibility() == HiddenVisibility;
   if (classIsHidden)
     flags |= NonFragileABI_Class_Hidden;
 
@@ -5947,45 +6040,59 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
   if (ID->hasNonZeroConstructors() || ID->hasDestructors()) {
     flags |= NonFragileABI_Class_HasCXXStructors;
     if (!ID->hasNonZeroConstructors())
-      flags |= NonFragileABI_Class_HasCXXDestructorOnly;  
+      flags |= NonFragileABI_Class_HasCXXDestructorOnly;
   }
 
-  if (!ID->getClassInterface()->getSuperClass()) {
+  if (!CI->getSuperClass()) {
     // class is root
     flags |= NonFragileABI_Class_Root;
-    TClassName = ObjCClassName;
-    TClassName += ClassName;
-    SuperClassGV = GetClassGlobal(TClassName.str(),
-                                  ID->getClassInterface()->isWeakImported());
-    TClassName = ObjCMetaClassName;
-    TClassName += ClassName;
-    IsAGV = GetClassGlobal(TClassName.str(),
-                           ID->getClassInterface()->isWeakImported());
+
+    SuperClassGV = GetClassGlobal((getClassSymbolPrefix() + ClassName).str(),
+                                  CI->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (CI->hasAttr<DLLImportAttr>())
+        SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+
+    IsAGV = GetClassGlobal((getMetaclassSymbolPrefix() + ClassName).str(),
+                           CI->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (CI->hasAttr<DLLImportAttr>())
+        IsAGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
   } else {
     // Has a root. Current class is not a root.
     const ObjCInterfaceDecl *Root = ID->getClassInterface();
     while (const ObjCInterfaceDecl *Super = Root->getSuperClass())
       Root = Super;
-    TClassName = ObjCMetaClassName ;
-    TClassName += Root->getObjCRuntimeNameAsString();
-    IsAGV = GetClassGlobal(TClassName.str(),
+
+    const auto *Super = CI->getSuperClass();
+    StringRef RootClassName = Root->getObjCRuntimeNameAsString();
+    StringRef SuperClassName = Super->getObjCRuntimeNameAsString();
+
+    IsAGV = GetClassGlobal((getMetaclassSymbolPrefix() + RootClassName).str(),
                            Root->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (Root->hasAttr<DLLImportAttr>())
+        IsAGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
 
     // work on super class metadata symbol.
-    TClassName = ObjCMetaClassName;
-    TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString();
-    SuperClassGV = GetClassGlobal(
-                                  TClassName.str(),
-                                  ID->getClassInterface()->getSuperClass()->isWeakImported());
-  }
-  llvm::GlobalVariable *CLASS_RO_GV = BuildClassRoTInitializer(flags,
-                                                               InstanceStart,
-                                                               InstanceSize,ID);
-  TClassName = ObjCMetaClassName;
-  TClassName += ClassName;
-  llvm::GlobalVariable *MetaTClass = BuildClassMetaData(
-      TClassName.str(), IsAGV, SuperClassGV, CLASS_RO_GV, classIsHidden,
-      ID->getClassInterface()->isWeakImported());
+    SuperClassGV =
+        GetClassGlobal((getMetaclassSymbolPrefix() + SuperClassName).str(),
+                       Super->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (Super->hasAttr<DLLImportAttr>())
+        SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+  }
+
+  llvm::GlobalVariable *CLASS_RO_GV =
+      BuildClassRoTInitializer(flags, InstanceStart, InstanceSize, ID);
+
+  llvm::GlobalVariable *MetaTClass =
+      BuildClassMetaData((getMetaclassSymbolPrefix() + ClassName).str(), IsAGV,
+                         SuperClassGV, CLASS_RO_GV, classIsHidden,
+                         CI->isWeakImported());
+  if (CGM.getTriple().isOSBinFormatCOFF())
+    if (CI->hasAttr<DLLExportAttr>())
+      MetaTClass->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
   DefinedMetaClasses.push_back(MetaTClass);
 
   // Metadata for the class
@@ -6006,34 +6113,38 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
       flags |= NonFragileABI_Class_HasCXXDestructorOnly;
   }
 
-  if (hasObjCExceptionAttribute(CGM.getContext(), ID->getClassInterface()))
+  if (hasObjCExceptionAttribute(CGM.getContext(), CI))
     flags |= NonFragileABI_Class_Exception;
 
-  if (!ID->getClassInterface()->getSuperClass()) {
+  if (!CI->getSuperClass()) {
     flags |= NonFragileABI_Class_Root;
     SuperClassGV = nullptr;
   } else {
     // Has a root. Current class is not a root.
-    TClassName = ObjCClassName;
-    TClassName += ID->getClassInterface()->getSuperClass()->getObjCRuntimeNameAsString();
-    SuperClassGV = GetClassGlobal(
-                                  TClassName.str(),
-                                  ID->getClassInterface()->getSuperClass()->isWeakImported());
+    const auto *Super = CI->getSuperClass();
+    StringRef SuperClassName = Super->getObjCRuntimeNameAsString();
+
+    SuperClassGV =
+        GetClassGlobal((getClassSymbolPrefix() + SuperClassName).str(),
+                       Super->isWeakImported());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (Super->hasAttr<DLLImportAttr>())
+        SuperClassGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
   }
+
   GetClassSizeInfo(ID, InstanceStart, InstanceSize);
-  CLASS_RO_GV = BuildClassRoTInitializer(flags,
-                                         InstanceStart,
-                                         InstanceSize,
-                                         ID);
+  CLASS_RO_GV =
+      BuildClassRoTInitializer(flags, InstanceStart, InstanceSize, ID);
 
-  TClassName = ObjCClassName;
-  TClassName += ClassName;
   llvm::GlobalVariable *ClassMD =
-    BuildClassMetaData(TClassName.str(), MetaTClass, SuperClassGV, CLASS_RO_GV,
-                       classIsHidden,
-                       ID->getClassInterface()->isWeakImported());
+    BuildClassMetaData((getClassSymbolPrefix() + ClassName).str(), MetaTClass,
+                       SuperClassGV, CLASS_RO_GV, classIsHidden,
+                       CI->isWeakImported());
+  if (CGM.getTriple().isOSBinFormatCOFF())
+    if (CI->hasAttr<DLLExportAttr>())
+      ClassMD->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
   DefinedClasses.push_back(ClassMD);
-  ImplementedClasses.push_back(ID->getClassInterface());
+  ImplementedClasses.push_back(CI);
 
   // Determine if this class is also "non-lazy".
   if (ImplementationIsNonLazy(ID))
@@ -6041,7 +6152,7 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
 
   // Force the definition of the EHType if necessary.
   if (flags & NonFragileABI_Class_Exception)
-    GetInterfaceEHType(ID->getClassInterface(), true);
+    GetInterfaceEHType(CI, true);
   // Make sure method definition entries are all clear for next implementation.
   MethodDefinitions.clear();
 }
@@ -6093,6 +6204,8 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
 ///   const struct _method_list_t * const class_methods;
 ///   const struct _protocol_list_t * const protocols;
 ///   const struct _prop_list_t * const properties;
+///   const struct _prop_list_t * const class_properties;
+///   const uint32_t size;
 /// }
 ///
 void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
@@ -6107,7 +6220,7 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
   llvm::SmallString<64> ExtClassName(getClassSymbolPrefix());
   ExtClassName += Interface->getObjCRuntimeNameAsString();
 
-  llvm::Constant *Values[6];
+  llvm::Constant *Values[8];
   Values[0] = GetClassName(OCD->getIdentifier()->getName());
   // meta-class entry symbol
   llvm::GlobalVariable *ClassGV =
@@ -6156,12 +6269,18 @@ void CGObjCNonFragileABIMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
                                    Category->protocol_begin(),
                                    Category->protocol_end());
     Values[5] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ExtName.str(),
-                                 OCD, Category, ObjCTypes);
+                                 OCD, Category, ObjCTypes, false);
+    Values[6] = EmitPropertyList("\01l_OBJC_$_CLASS_PROP_LIST_" + ExtName.str(),
+                                 OCD, Category, ObjCTypes, true);
   } else {
     Values[4] = llvm::Constant::getNullValue(ObjCTypes.ProtocolListnfABIPtrTy);
     Values[5] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
+    Values[6] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
   }
 
+  unsigned Size = CGM.getDataLayout().getTypeAllocSize(ObjCTypes.CategorynfABITy);
+  Values[7] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
+
   llvm::Constant *Init =
     llvm::ConstantStruct::get(ObjCTypes.CategorynfABITy,
                               Values);
@@ -6210,9 +6329,8 @@ llvm::Constant *CGObjCNonFragileABIMac::GetMethodConstant(
 /// }
 ///
 llvm::Constant *
-CGObjCNonFragileABIMac::EmitMethodList(Twine Name,
-                                       const char *Section,
-                                       ArrayRef<llvm::Constant*> Methods) {
+CGObjCNonFragileABIMac::EmitMethodList(Twine Name, StringRef Section,
+                                       ArrayRef<llvm::Constant *> Methods) {
   // Return null for empty list.
   if (Methods.empty())
     return llvm::Constant::getNullValue(ObjCTypes.MethodListnfABIPtrTy);
@@ -6242,18 +6360,28 @@ CGObjCNonFragileABIMac::EmitMethodList(Twine Name,
 llvm::GlobalVariable *
 CGObjCNonFragileABIMac::ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
                                                const ObjCIvarDecl *Ivar) {
-    
   const ObjCInterfaceDecl *Container = Ivar->getContainingInterface();
   llvm::SmallString<64> Name("OBJC_IVAR_$_");
   Name += Container->getObjCRuntimeNameAsString();
   Name += ".";
   Name += Ivar->getName();
-  llvm::GlobalVariable *IvarOffsetGV =
-    CGM.getModule().getGlobalVariable(Name);
-  if (!IvarOffsetGV)
-    IvarOffsetGV = new llvm::GlobalVariable(
-      CGM.getModule(), ObjCTypes.IvarOffsetVarTy, false,
-      llvm::GlobalValue::ExternalLinkage, nullptr, Name.str());
+  llvm::GlobalVariable *IvarOffsetGV = CGM.getModule().getGlobalVariable(Name);
+  if (!IvarOffsetGV) {
+    IvarOffsetGV =
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.IvarOffsetVarTy,
+                                 false, llvm::GlobalValue::ExternalLinkage,
+                                 nullptr, Name.str());
+    if (CGM.getTriple().isOSBinFormatCOFF()) {
+      bool IsPrivateOrPackage =
+          Ivar->getAccessControl() == ObjCIvarDecl::Private ||
+          Ivar->getAccessControl() == ObjCIvarDecl::Package;
+
+      if (ID->hasAttr<DLLExportAttr>() && !IsPrivateOrPackage)
+        IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+      else if (ID->hasAttr<DLLImportAttr>())
+        IvarOffsetGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+    }
+  }
   return IvarOffsetGV;
 }
 
@@ -6267,14 +6395,17 @@ CGObjCNonFragileABIMac::EmitIvarOffsetVar(const ObjCInterfaceDecl *ID,
   IvarOffsetGV->setAlignment(
       CGM.getDataLayout().getABITypeAlignment(ObjCTypes.IvarOffsetVarTy));
 
-  // FIXME: This matches gcc, but shouldn't the visibility be set on the use as
-  // well (i.e., in ObjCIvarOffsetVariable).
-  if (Ivar->getAccessControl() == ObjCIvarDecl::Private ||
-      Ivar->getAccessControl() == ObjCIvarDecl::Package ||
-      ID->getVisibility() == HiddenVisibility)
-    IvarOffsetGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  else
-    IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility);
+  if (!CGM.getTriple().isOSBinFormatCOFF()) {
+    // FIXME: This matches gcc, but shouldn't the visibility be set on the use
+    // as well (i.e., in ObjCIvarOffsetVariable).
+    if (Ivar->getAccessControl() == ObjCIvarDecl::Private ||
+        Ivar->getAccessControl() == ObjCIvarDecl::Package ||
+        ID->getVisibility() == HiddenVisibility)
+      IvarOffsetGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+    else
+      IvarOffsetGV->setVisibility(llvm::GlobalValue::DefaultVisibility);
+  }
+
   IvarOffsetGV->setSection("__DATA, __objc_ivar");
   return IvarOffsetGV;
 }
@@ -6361,7 +6492,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef(
   const ObjCProtocolDecl *PD) {
   llvm::GlobalVariable *&Entry = Protocols[PD->getIdentifier()];
 
-  if (!Entry) {
+  if (!Entry)
     // We use the initializer as a marker of whether this is a forward
     // reference or not. At module finalization we add the empty
     // contents for protocols which were referenced but never defined.
@@ -6370,8 +6501,6 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef(
                                  false, llvm::GlobalValue::ExternalLinkage,
                                  nullptr,
                                  "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString());
-    Entry->setSection("__DATA,__datacoal_nt,coalesced");
-  }
 
   return Entry;
 }
@@ -6391,6 +6520,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocolRef(
 ///   const uint32_t flags;  // = 0
 ///   const char ** extendedMethodTypes;
 ///   const char *demangledName;
+///   const struct _prop_list_t * class_properties;
 /// }
 /// @endcode
 ///
@@ -6442,7 +6572,7 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
   MethodTypesExt.insert(MethodTypesExt.end(),
                         OptMethodTypesExt.begin(), OptMethodTypesExt.end());
 
-  llvm::Constant *Values[12];
+  llvm::Constant *Values[13];
   // isa is NULL
   Values[0] = llvm::Constant::getNullValue(ObjCTypes.ObjectPtrTy);
   Values[1] = GetClassName(PD->getObjCRuntimeNameAsString());
@@ -6466,8 +6596,9 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
                              + PD->getObjCRuntimeNameAsString(),
                              "__DATA, __objc_const",
                              OptClassMethods);
-  Values[7] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(),
-                               nullptr, PD, ObjCTypes);
+  Values[7] = EmitPropertyList(
+      "\01l_OBJC_$_PROP_LIST_" + PD->getObjCRuntimeNameAsString(),
+      nullptr, PD, ObjCTypes, false);
   uint32_t Size =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ProtocolnfABITy);
   Values[8] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
@@ -6477,6 +6608,10 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
                                        MethodTypesExt, ObjCTypes);
   // const char *demangledName;
   Values[11] = llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy);
+
+  Values[12] = EmitPropertyList(
+      "\01l_OBJC_$_CLASS_PROP_LIST_" + PD->getObjCRuntimeNameAsString(),
+      nullptr, PD, ObjCTypes, true);
     
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ProtocolnfABITy,
                                                    Values);
@@ -6492,7 +6627,6 @@ llvm::Constant *CGObjCNonFragileABIMac::GetOrEmitProtocol(
                                "\01l_OBJC_PROTOCOL_$_" + PD->getObjCRuntimeNameAsString());
     Entry->setAlignment(
       CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ProtocolnfABITy));
-    Entry->setSection("__DATA,__datacoal_nt,coalesced");
 
     Protocols[PD->getIdentifier()] = Entry;
   }
@@ -6640,7 +6774,7 @@ static void appendSelectorForMessageRefTable(std::string &buffer,
   }
 }
 
-/// Emit a "v-table" message send.  We emit a weak hidden-visibility
+/// Emit a "vtable" message send.  We emit a weak hidden-visibility
 /// struct, initially containing the selector pointer and a pointer to
 /// a "fixup" variant of the appropriate objc_msgSend.  To call, we
 /// load and call the function pointer, passing the address of the
@@ -6734,7 +6868,7 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
   
   bool requiresnullCheck = false;
   if (CGM.getLangOpts().ObjCAutoRefCount && method)
-    for (const auto *ParamDecl : method->params()) {
+    for (const auto *ParamDecl : method->parameters()) {
       if (ParamDecl->hasAttr<NSConsumedAttr>()) {
         if (!nullReturn.NullBB)
           nullReturn.init(CGF, arg0);
@@ -6783,7 +6917,7 @@ CGObjCNonFragileABIMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
 }
 
 llvm::GlobalVariable *
-CGObjCNonFragileABIMac::GetClassGlobal(const std::string &Name, bool Weak) {
+CGObjCNonFragileABIMac::GetClassGlobal(StringRef Name, bool Weak) {
   llvm::GlobalValue::LinkageTypes L =
       Weak ? llvm::GlobalValue::ExternalWeakLinkage
            : llvm::GlobalValue::ExternalLinkage;
@@ -6806,9 +6940,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
   llvm::GlobalVariable *&Entry = ClassReferences[II];
   
   if (!Entry) {
-    std::string ClassName(
-      getClassSymbolPrefix() +
-      (ID ? ID->getObjCRuntimeNameAsString() : II->getName()).str());
+    StringRef Name = ID ? ID->getObjCRuntimeNameAsString() : II->getName();
+    std::string ClassName = (getClassSymbolPrefix() + Name).str();
     llvm::GlobalVariable *ClassGV = GetClassGlobal(ClassName, Weak);
     Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
                                      false, llvm::GlobalValue::PrivateLinkage,
@@ -6822,6 +6955,11 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
 
 llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF,
                                                   const ObjCInterfaceDecl *ID) {
+  // If the class has the objc_runtime_visible attribute, we need to
+  // use the Objective-C runtime to get the class.
+  if (ID->hasAttr<ObjCRuntimeVisibleAttr>())
+    return EmitClassRefViaRuntime(CGF, ID, ObjCTypes);
+
   return EmitClassRefFromId(CGF, ID->getIdentifier(), ID->isWeakImported(), ID);
 }
 
@@ -7100,27 +7238,28 @@ CGObjCNonFragileABIMac::EmitSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
 llvm::Constant *
 CGObjCNonFragileABIMac::GetEHType(QualType T) {
   // There's a particular fixed type info for 'id'.
-  if (T->isObjCIdType() ||
-      T->isObjCQualifiedIdType()) {
-    llvm::Constant *IDEHType =
-      CGM.getModule().getGlobalVariable("OBJC_EHTYPE_id");
-    if (!IDEHType)
+  if (T->isObjCIdType() || T->isObjCQualifiedIdType()) {
+    auto *IDEHType = CGM.getModule().getGlobalVariable("OBJC_EHTYPE_id");
+    if (!IDEHType) {
       IDEHType =
-        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy,
-                                 false,
-                                 llvm::GlobalValue::ExternalLinkage,
-                                 nullptr, "OBJC_EHTYPE_id");
+          new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
+                                   llvm::GlobalValue::ExternalLinkage, nullptr,
+                                   "OBJC_EHTYPE_id");
+      if (CGM.getTriple().isOSBinFormatCOFF())
+        IDEHType->setDLLStorageClass(getStorage(CGM, "OBJC_EHTYPE_id"));
+    }
     return IDEHType;
   }
 
   // All other types should be Objective-C interface pointer types.
-  const ObjCObjectPointerType *PT =
-    T->getAs<ObjCObjectPointerType>();
+  const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>();
   assert(PT && "Invalid @catch type.");
+
   const ObjCInterfaceType *IT = PT->getInterfaceType();
   assert(IT && "Invalid @catch type.");
+
   return GetInterfaceEHType(IT->getDecl(), false);
-}                                                  
+}
 
 void CGObjCNonFragileABIMac::EmitTryStmt(CodeGen::CodeGenFunction &CGF,
                                          const ObjCAtTryStmt &S) {
@@ -7153,6 +7292,7 @@ llvm::Constant *
 CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
                                            bool ForDefinition) {
   llvm::GlobalVariable * &Entry = EHTypeReferences[ID->getIdentifier()];
+  StringRef ClassName = ID->getObjCRuntimeNameAsString();
 
   // If we don't need a definition, return the entry if found or check
   // if we use an external reference.
@@ -7162,38 +7302,43 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
 
     // If this type (or a super class) has the __objc_exception__
     // attribute, emit an external reference.
-    if (hasObjCExceptionAttribute(CGM.getContext(), ID))
-      return Entry =
-          new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
-                                   llvm::GlobalValue::ExternalLinkage,
-                                   nullptr,
-                                   ("OBJC_EHTYPE_$_" +
-                                    ID->getObjCRuntimeNameAsString()));
+    if (hasObjCExceptionAttribute(CGM.getContext(), ID)) {
+      std::string EHTypeName = ("OBJC_EHTYPE_$_" + ClassName).str();
+      Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy,
+                                       false, llvm::GlobalValue::ExternalLinkage,
+                                       nullptr, EHTypeName);
+      if (CGM.getTriple().isOSBinFormatCOFF()) {
+        if (ID->hasAttr<DLLExportAttr>())
+          Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+        else if (ID->hasAttr<DLLImportAttr>())
+          Entry->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+      }
+      return Entry;
+    }
   }
 
-  // Otherwise we need to either make a new entry or fill in the
-  // initializer.
+  // Otherwise we need to either make a new entry or fill in the initializer.
   assert((!Entry || !Entry->hasInitializer()) && "Duplicate EHType definition");
-  llvm::SmallString<64> ClassName(getClassSymbolPrefix());
-  ClassName += ID->getObjCRuntimeNameAsString();
+
   std::string VTableName = "objc_ehtype_vtable";
-  llvm::GlobalVariable *VTableGV =
-    CGM.getModule().getGlobalVariable(VTableName);
-  if (!VTableGV)
-    VTableGV = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy,
-                                        false,
-                                        llvm::GlobalValue::ExternalLinkage,
-                                        nullptr, VTableName);
+  auto *VTableGV = CGM.getModule().getGlobalVariable(VTableName);
+  if (!VTableGV) {
+    VTableGV =
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.Int8PtrTy, false,
+                                 llvm::GlobalValue::ExternalLinkage, nullptr,
+                                 VTableName);
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      VTableGV->setDLLStorageClass(getStorage(CGM, VTableName));
+  }
 
   llvm::Value *VTableIdx = llvm::ConstantInt::get(CGM.Int32Ty, 2);
-
   llvm::Constant *Values[] = {
       llvm::ConstantExpr::getGetElementPtr(VTableGV->getValueType(), VTableGV,
                                            VTableIdx),
       GetClassName(ID->getObjCRuntimeNameAsString()),
-      GetClassGlobal(ClassName.str())};
-  llvm::Constant *Init =
-    llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values);
+      GetClassGlobal((getClassSymbolPrefix() + ClassName).str()),
+  };
+  llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.EHTypeTy, Values);
 
   llvm::GlobalValue::LinkageTypes L = ForDefinition
                                           ? llvm::GlobalValue::ExternalLinkage
@@ -7201,24 +7346,25 @@ CGObjCNonFragileABIMac::GetInterfaceEHType(const ObjCInterfaceDecl *ID,
   if (Entry) {
     Entry->setInitializer(Init);
   } else {
-    llvm::SmallString<64> EHTYPEName("OBJC_EHTYPE_$_");
-    EHTYPEName += ID->getObjCRuntimeNameAsString();
-    Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false,
-                                     L,
-                                     Init,
-                                     EHTYPEName.str());
+    Entry =
+        new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.EHTypeTy, false, L,
+                                 Init, ("OBJC_EHTYPE_$_" + ClassName).str());
+    if (CGM.getTriple().isOSBinFormatCOFF())
+      if (hasObjCExceptionAttribute(CGM.getContext(), ID))
+        if (ID->hasAttr<DLLExportAttr>())
+          Entry->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
   }
   assert(Entry->getLinkage() == L);
 
-  if (ID->getVisibility() == HiddenVisibility)
-    Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
-  Entry->setAlignment(CGM.getDataLayout().getABITypeAlignment(
-      ObjCTypes.EHTypeTy));
+  if (!CGM.getTriple().isOSBinFormatCOFF())
+    if (ID->getVisibility() == HiddenVisibility)
+      Entry->setVisibility(llvm::GlobalValue::HiddenVisibility);
+
+  const auto &DL = CGM.getDataLayout();
+  Entry->setAlignment(DL.getABITypeAlignment(ObjCTypes.EHTypeTy));
 
   if (ForDefinition)
     Entry->setSection("__DATA,__objc_const");
-  else
-    Entry->setSection("__DATA,__datacoal_nt,coalesced");
 
   return Entry;
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
index 7be9ae9..0caf6d9 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.cpp
@@ -120,9 +120,8 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
   uint64_t BitOffset = FieldBitOffset % CGF.CGM.getContext().getCharWidth();
   uint64_t AlignmentBits = CGF.CGM.getTarget().getCharAlign();
   uint64_t BitFieldSize = Ivar->getBitWidthValue(CGF.getContext());
-  CharUnits StorageSize =
-    CGF.CGM.getContext().toCharUnitsFromBits(
-      llvm::RoundUpToAlignment(BitOffset + BitFieldSize, AlignmentBits));
+  CharUnits StorageSize = CGF.CGM.getContext().toCharUnitsFromBits(
+      llvm::alignTo(BitOffset + BitFieldSize, AlignmentBits));
   CharUnits Alignment = CGF.CGM.getContext().toCharUnitsFromBits(AlignmentBits);
 
   // Allocate a new CGBitFieldInfo object to describe this access.
@@ -364,25 +363,15 @@ CGObjCRuntime::getMessageSendInfo(const ObjCMethodDecl *method,
     llvm::PointerType *signatureType =
       CGM.getTypes().GetFunctionType(signature)->getPointerTo();
 
-    // If that's not variadic, there's no need to recompute the ABI
-    // arrangement.
-    if (!signature.isVariadic())
-      return MessageSendInfo(signature, signatureType);
+    const CGFunctionInfo &signatureForCall =
+      CGM.getTypes().arrangeCall(signature, callArgs);
 
-    // Otherwise, there is.
-    FunctionType::ExtInfo einfo = signature.getExtInfo();
-    const CGFunctionInfo &argsInfo =
-      CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, einfo,
-                                             signature.getRequiredArgs());
-
-    return MessageSendInfo(argsInfo, signatureType);
+    return MessageSendInfo(signatureForCall, signatureType);
   }
 
   // There's no method;  just use a default CC.
   const CGFunctionInfo &argsInfo =
-    CGM.getTypes().arrangeFreeFunctionCall(resultType, callArgs, 
-                                           FunctionType::ExtInfo(),
-                                           RequiredArgs::All);
+    CGM.getTypes().arrangeUnprototypedObjCMessageSend(resultType, callArgs);
 
   // Derive the signature to call from that.
   llvm::PointerType *signatureType =
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h
index 28d88dd..6c33059 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGObjCRuntime.h
@@ -280,7 +280,7 @@ public:
   virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
                                            QualType T) = 0;
 
-  virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
+  virtual llvm::GlobalVariable *GetClassGlobal(StringRef Name,
                                                bool Weak = false) = 0;
 
   struct MessageSendInfo {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 6866789..38aebea 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -40,46 +40,12 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
   default: 
     llvm_unreachable("Unexpected opencl builtin type!");
     return nullptr;
-  case BuiltinType::OCLImage1d:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image1d_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage1dArray:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image1d_array_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage1dBuffer:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image1d_buffer_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2d:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image2d_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2dArray:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image2d_array_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2dDepth:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_depth_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2dArrayDepth:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_array_depth_t"),
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+  case BuiltinType::Id: \
+    return llvm::PointerType::get( \
+        llvm::StructType::create(Ctx, "opencl." #ImgType "_" #Suffix "_t"), \
         ImgAddrSpc);
-  case BuiltinType::OCLImage2dMSAA:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_msaa_t"), ImgAddrSpc);
-  case BuiltinType::OCLImage2dArrayMSAA:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_t"),
-        ImgAddrSpc);
-  case BuiltinType::OCLImage2dMSAADepth:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_msaa_depth_t"),
-        ImgAddrSpc);
-  case BuiltinType::OCLImage2dArrayMSAADepth:
-    return llvm::PointerType::get(
-        llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_depth_t"),
-        ImgAddrSpc);
-  case BuiltinType::OCLImage3d:
-    return llvm::PointerType::get(llvm::StructType::create(
-                           Ctx, "opencl.image3d_t"), ImgAddrSpc);
+#include "clang/Basic/OpenCLImageTypes.def"
   case BuiltinType::OCLSampler:
     return llvm::IntegerType::get(Ctx, 32);
   case BuiltinType::OCLEvent:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 5cfacac..6a0edbe 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -72,6 +72,8 @@ public:
   /// \return LValue for thread id variable. This LValue always has type int32*.
   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
 
+  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
+
   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
 
   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
@@ -82,6 +84,8 @@ public:
     return Info->getKind() == CR_OpenMP;
   }
 
+  ~CGOpenMPRegionInfo() override = default;
+
 protected:
   CGOpenMPRegionKind RegionKind;
   RegionCodeGenTy CodeGen;
@@ -90,7 +94,7 @@ protected:
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
 public:
   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
                              const RegionCodeGenTy &CodeGen,
@@ -100,6 +104,7 @@ public:
         ThreadIDVar(ThreadIDVar) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
+
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
@@ -120,16 +125,65 @@ private:
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
 public:
+  class UntiedTaskActionTy final : public PrePostActionTy {
+    bool Untied;
+    const VarDecl *PartIDVar;
+    const RegionCodeGenTy UntiedCodeGen;
+    llvm::SwitchInst *UntiedSwitch = nullptr;
+
+  public:
+    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
+                       const RegionCodeGenTy &UntiedCodeGen)
+        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
+    void Enter(CodeGenFunction &CGF) override {
+      if (Untied) {
+        // Emit task switching point.
+        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+            CGF.GetAddrOfLocalVar(PartIDVar),
+            PartIDVar->getType()->castAs<PointerType>());
+        auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
+        auto *DoneBB = CGF.createBasicBlock(".untied.done.");
+        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
+        CGF.EmitBlock(DoneBB);
+        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
+        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
+                              CGF.Builder.GetInsertBlock());
+        emitUntiedSwitch(CGF);
+      }
+    }
+    void emitUntiedSwitch(CodeGenFunction &CGF) const {
+      if (Untied) {
+        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+            CGF.GetAddrOfLocalVar(PartIDVar),
+            PartIDVar->getType()->castAs<PointerType>());
+        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
+                              PartIdLVal);
+        UntiedCodeGen(CGF);
+        CodeGenFunction::JumpDest CurPoint =
+            CGF.getJumpDestInCurrentScope(".untied.next.");
+        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
+        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
+                              CGF.Builder.GetInsertBlock());
+        CGF.EmitBranchThroughCleanup(CurPoint);
+        CGF.EmitBlock(CurPoint.getBlock());
+      }
+    }
+    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
+  };
   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
                                  const VarDecl *ThreadIDVar,
                                  const RegionCodeGenTy &CodeGen,
-                                 OpenMPDirectiveKind Kind, bool HasCancel)
+                                 OpenMPDirectiveKind Kind, bool HasCancel,
+                                 const UntiedTaskActionTy &Action)
       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
-        ThreadIDVar(ThreadIDVar) {
+        ThreadIDVar(ThreadIDVar), Action(Action) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
+
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
@@ -140,6 +194,10 @@ public:
   /// \brief Get the name of the capture helper.
   StringRef getHelperName() const override { return ".omp_outlined."; }
 
+  void emitUntiedSwitch(CodeGenFunction &CGF) override {
+    Action.emitUntiedSwitch(CGF);
+  }
+
   static bool classof(const CGCapturedStmtInfo *Info) {
     return CGOpenMPRegionInfo::classof(Info) &&
            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
@@ -150,6 +208,8 @@ private:
   /// \brief A variable or parameter storing global thread id for OpenMP
   /// constructs.
   const VarDecl *ThreadIDVar;
+  /// Action for emitting code for untied tasks.
+  const UntiedTaskActionTy &Action;
 };
 
 /// \brief API for inlined captured statement code generation in OpenMP
@@ -162,12 +222,14 @@ public:
       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
         OldCSI(OldCSI),
         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
+
   // \brief Retrieve the value of the context parameter.
   llvm::Value *getContextValue() const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->getContextValue();
     llvm_unreachable("No context value for inlined OpenMP region");
   }
+
   void setContextValue(llvm::Value *V) override {
     if (OuterRegionInfo) {
       OuterRegionInfo->setContextValue(V);
@@ -175,6 +237,7 @@ public:
     }
     llvm_unreachable("No context value for inlined OpenMP region");
   }
+
   /// \brief Lookup the captured field decl for a variable.
   const FieldDecl *lookup(const VarDecl *VD) const override {
     if (OuterRegionInfo)
@@ -183,11 +246,13 @@ public:
     // captured variables, we can use the original one.
     return nullptr;
   }
+
   FieldDecl *getThisFieldDecl() const override {
     if (OuterRegionInfo)
       return OuterRegionInfo->getThisFieldDecl();
     return nullptr;
   }
+
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
   const VarDecl *getThreadIDVariable() const override {
@@ -203,6 +268,11 @@ public:
     llvm_unreachable("No helper name for inlined OpenMP construct");
   }
 
+  void emitUntiedSwitch(CodeGenFunction &CGF) override {
+    if (OuterRegionInfo)
+      OuterRegionInfo->emitUntiedSwitch(CGF);
+  }
+
   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
 
   static bool classof(const CGCapturedStmtInfo *Info) {
@@ -210,6 +280,8 @@ public:
            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
   }
 
+  ~CGOpenMPInlinedRegionInfo() override = default;
+
 private:
   /// \brief CodeGen info about outer OpenMP region.
   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
@@ -221,7 +293,7 @@ private:
 /// captured fields. The name of the target region has to be unique in a given
 /// application so it is provided by the client, because only the client has
 /// the information to generate that.
-class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
 public:
   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
@@ -245,9 +317,75 @@ private:
   StringRef HelperName;
 };
 
+static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
+  llvm_unreachable("No codegen for expressions");
+}
+/// \brief API for generation of expressions captured in a innermost OpenMP
+/// region.
+class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
+public:
+  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
+      : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
+                                  OMPD_unknown,
+                                  /*HasCancel=*/false),
+        PrivScope(CGF) {
+    // Make sure the globals captured in the provided statement are local by
+    // using the privatization logic. We assume the same variable is not
+    // captured more than once.
+    for (auto &C : CS.captures()) {
+      if (!C.capturesVariable() && !C.capturesVariableByCopy())
+        continue;
+
+      const VarDecl *VD = C.getCapturedVar();
+      if (VD->isLocalVarDeclOrParm())
+        continue;
+
+      DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+                      /*RefersToEnclosingVariableOrCapture=*/false,
+                      VD->getType().getNonReferenceType(), VK_LValue,
+                      SourceLocation());
+      PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
+        return CGF.EmitLValue(&DRE).getAddress();
+      });
+    }
+    (void)PrivScope.Privatize();
+  }
+
+  /// \brief Lookup the captured field decl for a variable.
+  const FieldDecl *lookup(const VarDecl *VD) const override {
+    if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
+      return FD;
+    return nullptr;
+  }
+
+  /// \brief Emit the captured statement body.
+  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
+    llvm_unreachable("No body for expressions");
+  }
+
+  /// \brief Get a variable or parameter for storing global thread id
+  /// inside OpenMP construct.
+  const VarDecl *getThreadIDVariable() const override {
+    llvm_unreachable("No thread id for expressions");
+  }
+
+  /// \brief Get the name of the capture helper.
+  StringRef getHelperName() const override {
+    llvm_unreachable("No helper name for expressions");
+  }
+
+  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
+
+private:
+  /// Private scope to capture global variables.
+  CodeGenFunction::OMPPrivateScope PrivScope;
+};
+
 /// \brief RAII for emitting code of OpenMP constructs.
 class InlinedOpenMPRegionRAII {
   CodeGenFunction &CGF;
+  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
+  FieldDecl *LambdaThisCaptureField = nullptr;
 
 public:
   /// \brief Constructs region for combined constructs.
@@ -260,30 +398,306 @@ public:
     // Start emission for the construct.
     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
+    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
+    LambdaThisCaptureField = CGF.LambdaThisCaptureField;
+    CGF.LambdaThisCaptureField = nullptr;
   }
+
   ~InlinedOpenMPRegionRAII() {
     // Restore original CapturedStmtInfo only if we're done with code emission.
     auto *OldCSI =
         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
     delete CGF.CapturedStmtInfo;
     CGF.CapturedStmtInfo = OldCSI;
+    std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
+    CGF.LambdaThisCaptureField = LambdaThisCaptureField;
+  }
+};
+
+/// \brief Values for bit flags used in the ident_t to describe the fields.
+/// All enumeric elements are named and described in accordance with the code
+/// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+enum OpenMPLocationFlags {
+  /// \brief Use trampoline for internal microtask.
+  OMP_IDENT_IMD = 0x01,
+  /// \brief Use c-style ident structure.
+  OMP_IDENT_KMPC = 0x02,
+  /// \brief Atomic reduction option for kmpc_reduce.
+  OMP_ATOMIC_REDUCE = 0x10,
+  /// \brief Explicit 'barrier' directive.
+  OMP_IDENT_BARRIER_EXPL = 0x20,
+  /// \brief Implicit barrier in code.
+  OMP_IDENT_BARRIER_IMPL = 0x40,
+  /// \brief Implicit barrier in 'for' directive.
+  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
+  /// \brief Implicit barrier in 'sections' directive.
+  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
+  /// \brief Implicit barrier in 'single' directive.
+  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
+};
+
+/// \brief Describes ident structure that describes a source location.
+/// All descriptions are taken from
+/// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
+/// Original structure:
+/// typedef struct ident {
+///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
+///                                  see above  */
+///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
+///                                  KMP_IDENT_KMPC identifies this union
+///                                  member  */
+///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
+///                                  see above */
+///#if USE_ITT_BUILD
+///                            /*  but currently used for storing
+///                                region-specific ITT */
+///                            /*  contextual information. */
+///#endif /* USE_ITT_BUILD */
+///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
+///                                 C++  */
+///    char const *psource;    /**< String describing the source location.
+///                            The string is composed of semi-colon separated
+//                             fields which describe the source file,
+///                            the function and a pair of line numbers that
+///                            delimit the construct.
+///                             */
+/// } ident_t;
+enum IdentFieldIndex {
+  /// \brief might be used in Fortran
+  IdentField_Reserved_1,
+  /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
+  IdentField_Flags,
+  /// \brief Not really used in Fortran any more
+  IdentField_Reserved_2,
+  /// \brief Source[4] in Fortran, do not use for C++
+  IdentField_Reserved_3,
+  /// \brief String describing the source location. The string is composed of
+  /// semi-colon separated fields which describe the source file, the function
+  /// and a pair of line numbers that delimit the construct.
+  IdentField_PSource
+};
+
+/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
+/// the enum sched_type in kmp.h).
+enum OpenMPSchedType {
+  /// \brief Lower bound for default (unordered) versions.
+  OMP_sch_lower = 32,
+  OMP_sch_static_chunked = 33,
+  OMP_sch_static = 34,
+  OMP_sch_dynamic_chunked = 35,
+  OMP_sch_guided_chunked = 36,
+  OMP_sch_runtime = 37,
+  OMP_sch_auto = 38,
+  /// static with chunk adjustment (e.g., simd)
+  OMP_sch_static_balanced_chunked   = 45,
+  /// \brief Lower bound for 'ordered' versions.
+  OMP_ord_lower = 64,
+  OMP_ord_static_chunked = 65,
+  OMP_ord_static = 66,
+  OMP_ord_dynamic_chunked = 67,
+  OMP_ord_guided_chunked = 68,
+  OMP_ord_runtime = 69,
+  OMP_ord_auto = 70,
+  OMP_sch_default = OMP_sch_static,
+  /// \brief dist_schedule types
+  OMP_dist_sch_static_chunked = 91,
+  OMP_dist_sch_static = 92,
+  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
+  /// Set if the monotonic schedule modifier was present.
+  OMP_sch_modifier_monotonic = (1 << 29),
+  /// Set if the nonmonotonic schedule modifier was present.
+  OMP_sch_modifier_nonmonotonic = (1 << 30),
+};
+
+enum OpenMPRTLFunction {
+  /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
+  /// kmpc_micro microtask, ...);
+  OMPRTL__kmpc_fork_call,
+  /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
+  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
+  OMPRTL__kmpc_threadprivate_cached,
+  /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
+  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
+  OMPRTL__kmpc_threadprivate_register,
+  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
+  OMPRTL__kmpc_global_thread_num,
+  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
+  // kmp_critical_name *crit);
+  OMPRTL__kmpc_critical,
+  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
+  // global_tid, kmp_critical_name *crit, uintptr_t hint);
+  OMPRTL__kmpc_critical_with_hint,
+  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
+  // kmp_critical_name *crit);
+  OMPRTL__kmpc_end_critical,
+  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
+  // global_tid);
+  OMPRTL__kmpc_cancel_barrier,
+  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_barrier,
+  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_for_static_fini,
+  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+  // global_tid);
+  OMPRTL__kmpc_serialized_parallel,
+  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+  // global_tid);
+  OMPRTL__kmpc_end_serialized_parallel,
+  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
+  // kmp_int32 num_threads);
+  OMPRTL__kmpc_push_num_threads,
+  // Call to void __kmpc_flush(ident_t *loc);
+  OMPRTL__kmpc_flush,
+  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
+  OMPRTL__kmpc_master,
+  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
+  OMPRTL__kmpc_end_master,
+  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
+  // int end_part);
+  OMPRTL__kmpc_omp_taskyield,
+  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
+  OMPRTL__kmpc_single,
+  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
+  OMPRTL__kmpc_end_single,
+  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
+  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  // kmp_routine_entry_t *task_entry);
+  OMPRTL__kmpc_omp_task_alloc,
+  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
+  // new_task);
+  OMPRTL__kmpc_omp_task,
+  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
+  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
+  // kmp_int32 didit);
+  OMPRTL__kmpc_copyprivate,
+  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
+  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
+  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
+  OMPRTL__kmpc_reduce,
+  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
+  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
+  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
+  // *lck);
+  OMPRTL__kmpc_reduce_nowait,
+  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
+  // kmp_critical_name *lck);
+  OMPRTL__kmpc_end_reduce,
+  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
+  // kmp_critical_name *lck);
+  OMPRTL__kmpc_end_reduce_nowait,
+  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
+  // kmp_task_t * new_task);
+  OMPRTL__kmpc_omp_task_begin_if0,
+  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
+  // kmp_task_t * new_task);
+  OMPRTL__kmpc_omp_task_complete_if0,
+  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_ordered,
+  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_end_ordered,
+  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
+  // global_tid);
+  OMPRTL__kmpc_omp_taskwait,
+  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_taskgroup,
+  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_end_taskgroup,
+  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
+  // int proc_bind);
+  OMPRTL__kmpc_push_proc_bind,
+  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
+  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
+  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
+  OMPRTL__kmpc_omp_task_with_deps,
+  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
+  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
+  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
+  OMPRTL__kmpc_omp_wait_deps,
+  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
+  // global_tid, kmp_int32 cncl_kind);
+  OMPRTL__kmpc_cancellationpoint,
+  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
+  // kmp_int32 cncl_kind);
+  OMPRTL__kmpc_cancel,
+  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
+  // kmp_int32 num_teams, kmp_int32 thread_limit);
+  OMPRTL__kmpc_push_num_teams,
+  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
+  // microtask, ...);
+  OMPRTL__kmpc_fork_teams,
+  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+  // sched, kmp_uint64 grainsize, void *task_dup);
+  OMPRTL__kmpc_taskloop,
+  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
+  // num_dims, struct kmp_dim *dims);
+  OMPRTL__kmpc_doacross_init,
+  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
+  OMPRTL__kmpc_doacross_fini,
+  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
+  // *vec);
+  OMPRTL__kmpc_doacross_post,
+  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
+  // *vec);
+  OMPRTL__kmpc_doacross_wait,
+
+  //
+  // Offloading related calls
+  //
+  // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
+  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+  // *arg_types);
+  OMPRTL__tgt_target,
+  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+  OMPRTL__tgt_target_teams,
+  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
+  OMPRTL__tgt_register_lib,
+  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
+  OMPRTL__tgt_unregister_lib,
+  // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  OMPRTL__tgt_target_data_begin,
+  // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  OMPRTL__tgt_target_data_end,
+  // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
+  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+  OMPRTL__tgt_target_data_update,
+};
+
+/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
+/// region.
+class CleanupTy final : public EHScopeStack::Cleanup {
+  PrePostActionTy *Action;
+
+public:
+  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
+  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+    if (!CGF.HaveInsertPoint())
+      return;
+    Action->Exit(CGF);
   }
 };
 
 } // anonymous namespace
 
-static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
-                                      QualType Ty) {
-  AlignmentSource Source;
-  CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
-  return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
-                            Ty->getPointeeType(), Source);
+void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+  if (PrePostAction) {
+    CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
+    Callback(CodeGen, CGF, *PrePostAction);
+  } else {
+    PrePostActionTy Action;
+    Callback(CodeGen, CGF, Action);
+  }
 }
 
 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
-  return emitLoadOfPointerLValue(CGF,
-                                 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
-                                 getThreadIDVariable()->getType());
+  return CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
+      getThreadIDVariable()->getType()->castAs<PointerType>());
 }
 
 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
@@ -295,10 +709,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
   // The point of exit cannot be a branch out of the structured block.
   // longjmp() and throw() must not violate the entry/exit criteria.
   CGF.EHStack.pushTerminate();
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    CodeGen(CGF);
-  }
+  CodeGen(CGF);
   CGF.EHStack.popTerminate();
 }
 
@@ -310,16 +721,11 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
 }
 
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
-    : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr),
-      OffloadEntriesInfoManager(CGM) {
+    : CGM(CGM), OffloadEntriesInfoManager(CGM) {
   IdentTy = llvm::StructType::create(
       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
       CGM.Int8PtrTy /* psource */, nullptr);
-  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
-  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
-                               llvm::PointerType::getUnqual(CGM.Int32Ty)};
-  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
 
   loadOffloadInfoMetadata();
@@ -329,6 +735,90 @@ void CGOpenMPRuntime::clear() {
   InternalVars.clear();
 }
 
+static llvm::Function *
+emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
+                          const Expr *CombinerInitializer, const VarDecl *In,
+                          const VarDecl *Out, bool IsCombiner) {
+  // void .omp_combiner.(Ty *in, Ty *out);
+  auto &C = CGM.getContext();
+  QualType PtrTy = C.getPointerType(Ty).withRestrict();
+  FunctionArgList Args;
+  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
+                               /*Id=*/nullptr, PtrTy);
+  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
+                              /*Id=*/nullptr, PtrTy);
+  Args.push_back(&OmpOutParm);
+  Args.push_back(&OmpInParm);
+  auto &FnInfo =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+  auto *Fn = llvm::Function::Create(
+      FnTy, llvm::GlobalValue::InternalLinkage,
+      IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+  CodeGenFunction CGF(CGM);
+  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
+  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+  CodeGenFunction::OMPPrivateScope Scope(CGF);
+  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
+  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
+    return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
+        .getAddress();
+  });
+  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
+  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
+    return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
+        .getAddress();
+  });
+  (void)Scope.Privatize();
+  CGF.EmitIgnoredExpr(CombinerInitializer);
+  Scope.ForceCleanup();
+  CGF.FinishFunction();
+  return Fn;
+}
+
+void CGOpenMPRuntime::emitUserDefinedReduction(
+    CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
+  if (UDRMap.count(D) > 0)
+    return;
+  auto &C = CGM.getContext();
+  if (!In || !Out) {
+    In = &C.Idents.get("omp_in");
+    Out = &C.Idents.get("omp_out");
+  }
+  llvm::Function *Combiner = emitCombinerOrInitializer(
+      CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
+      cast<VarDecl>(D->lookup(Out).front()),
+      /*IsCombiner=*/true);
+  llvm::Function *Initializer = nullptr;
+  if (auto *Init = D->getInitializer()) {
+    if (!Priv || !Orig) {
+      Priv = &C.Idents.get("omp_priv");
+      Orig = &C.Idents.get("omp_orig");
+    }
+    Initializer = emitCombinerOrInitializer(
+        CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
+        cast<VarDecl>(D->lookup(Priv).front()),
+        /*IsCombiner=*/false);
+  }
+  UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
+  if (CGF) {
+    auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
+    Decls.second.push_back(D);
+  }
+}
+
+std::pair<llvm::Function *, llvm::Function *>
+CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
+  auto I = UDRMap.find(D);
+  if (I != UDRMap.end())
+    return I->second;
+  emitUserDefinedReduction(/*CGF=*/nullptr, D);
+  return UDRMap.lookup(D);
+}
+
 // Layout information for ident_t.
 static CharUnits getIdentAlign(CodeGenModule &CGM) {
   return CGM.getPointerAlign();
@@ -337,18 +827,18 @@ static CharUnits getIdentSize(CodeGenModule &CGM) {
   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
 }
-static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
+static CharUnits getOffsetOfIdentField(IdentFieldIndex Field) {
   // All the fields except the last are i32, so this works beautifully.
   return unsigned(Field) * CharUnits::fromQuantity(4);
 }
 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
-                                   CGOpenMPRuntime::IdentFieldIndex Field,
+                                   IdentFieldIndex Field,
                                    const llvm::Twine &Name = "") {
   auto Offset = getOffsetOfIdentField(Field);
   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
 }
 
-llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
+llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
   assert(ThreadIDVar->getType()->isPointerType() &&
@@ -370,19 +860,39 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
 
 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+    bool Tied, unsigned &NumberOfParts) {
+  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
+                                              PrePostActionTy &) {
+    auto *ThreadID = getThreadID(CGF, D.getLocStart());
+    auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
+    llvm::Value *TaskArgs[] = {
+        UpLoc, ThreadID,
+        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
+                                    TaskTVar->getType()->castAs<PointerType>())
+            .getPointer()};
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+  };
+  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
+                                                            UntiedCodeGen);
+  CodeGen.setAction(Action);
   assert(!ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 for tasks");
   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+  auto *TD = dyn_cast<OMPTaskDirective>(&D);
   CodeGenFunction CGF(CGM, true);
   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
                                         InnermostKind,
-                                        cast<OMPTaskDirective>(D).hasCancel());
+                                        TD ? TD->hasCancel() : false, Action);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-  return CGF.GenerateCapturedStmtFunction(*CS);
+  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
+  if (!Tied)
+    NumberOfParts = Action.getNumberOfParts();
+  return Res;
 }
 
-Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
+Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
   CharUnits Align = getIdentAlign(CGM);
   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
   if (!Entry) {
@@ -399,7 +909,7 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
         CGM.getModule(), IdentTy, /*isConstant*/ true,
         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
-    DefaultOpenMPLocation->setUnnamedAddr(true);
+    DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
 
     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
@@ -415,9 +925,10 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
 
 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
                                                  SourceLocation Loc,
-                                                 OpenMPLocationFlags Flags) {
+                                                 unsigned Flags) {
+  Flags |= OMP_IDENT_KMPC;
   // If no debug info is generated - return global default location.
-  if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
+  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
       Loc.isInvalid())
     return getOrCreateDefaultLocation(Flags).getPointer();
 
@@ -517,20 +1028,34 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
     OpenMPLocThreadIDMap.erase(CGF.CurFn);
+  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
+    for(auto *D : FunctionUDRMap[CGF.CurFn]) {
+      UDRMap.erase(D);
+    }
+    FunctionUDRMap.erase(CGF.CurFn);
+  }
 }
 
 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
+  if (!IdentTy) {
+  }
   return llvm::PointerType::getUnqual(IdentTy);
 }
 
 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
+  if (!Kmpc_MicroTy) {
+    // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
+    llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
+                                 llvm::PointerType::getUnqual(CGM.Int32Ty)};
+    Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
+  }
   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
 }
 
 llvm::Constant *
-CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
+CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
   llvm::Constant *RTLFn = nullptr;
-  switch (Function) {
+  switch (static_cast<OpenMPRTLFunction>(Function)) {
   case OMPRTL__kmpc_fork_call: {
     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
     // microtask, ...);
@@ -927,6 +1452,86 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
     break;
   }
+  case OMPRTL__kmpc_push_num_teams: {
+    // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
+    // kmp_int32 num_teams, kmp_int32 num_threads)
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
+        CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
+    break;
+  }
+  case OMPRTL__kmpc_fork_teams: {
+    // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
+    // microtask, ...);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                getKmpc_MicroPointerTy()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
+    break;
+  }
+  case OMPRTL__kmpc_taskloop: {
+    // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+    // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+    // sched, kmp_uint64 grainsize, void *task_dup);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.IntTy,
+                                CGM.VoidPtrTy,
+                                CGM.IntTy,
+                                CGM.Int64Ty->getPointerTo(),
+                                CGM.Int64Ty->getPointerTo(),
+                                CGM.Int64Ty,
+                                CGM.IntTy,
+                                CGM.IntTy,
+                                CGM.Int64Ty,
+                                CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
+    break;
+  }
+  case OMPRTL__kmpc_doacross_init: {
+    // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
+    // num_dims, struct kmp_dim *dims);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
+    break;
+  }
+  case OMPRTL__kmpc_doacross_fini: {
+    // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
+    break;
+  }
+  case OMPRTL__kmpc_doacross_post: {
+    // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
+    // *vec);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.Int64Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
+    break;
+  }
+  case OMPRTL__kmpc_doacross_wait: {
+    // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
+    // *vec);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                CGM.Int64Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
+    break;
+  }
   case OMPRTL__tgt_target: {
     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
@@ -943,6 +1548,24 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
     break;
   }
+  case OMPRTL__tgt_target_teams: {
+    // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
+    break;
+  }
   case OMPRTL__tgt_register_lib: {
     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
     QualType ParamTy =
@@ -963,30 +1586,53 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
     break;
   }
+  case OMPRTL__tgt_target_data_begin: {
+    // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
+    break;
+  }
+  case OMPRTL__tgt_target_data_end: {
+    // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
+    break;
+  }
+  case OMPRTL__tgt_target_data_update: {
+    // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
+    // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
+    break;
+  }
   }
+  assert(RTLFn && "Unable to find OpenMP runtime function");
   return RTLFn;
 }
 
-static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
-  auto &C = CGF.getContext();
-  llvm::Value *Size = nullptr;
-  auto SizeInChars = C.getTypeSizeInChars(Ty);
-  if (SizeInChars.isZero()) {
-    // getTypeSizeInChars() returns 0 for a VLA.
-    while (auto *VAT = C.getAsVariableArrayType(Ty)) {
-      llvm::Value *ArraySize;
-      std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
-      Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
-    }
-    SizeInChars = C.getTypeSizeInChars(Ty);
-    assert(!SizeInChars.isZero());
-    Size = CGF.Builder.CreateNUWMul(
-        Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
-  } else
-    Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
-  return Size;
-}
-
 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
                                                              bool IVSigned) {
   assert((IVSize == 32 || IVSize == 64) &&
@@ -1144,9 +1790,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
       Args.push_back(&Dst);
 
-      auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-          CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
-          /*isVariadic=*/false);
+      auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+          CGM.getContext().VoidPtrTy, Args);
       auto FTy = CGM.getTypes().GetFunctionType(FI);
       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
           FTy, ".__kmpc_global_ctor_.", FI, Loc);
@@ -1176,14 +1821,16 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
       Args.push_back(&Dst);
 
-      auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-          CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
-          /*isVariadic=*/false);
+      auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+          CGM.getContext().VoidTy, Args);
       auto FTy = CGM.getTypes().GetFunctionType(FI);
       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
           FTy, ".__kmpc_global_dtor_.", FI, Loc);
+      auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
                             SourceLocation());
+      // Create a scope with an artificial location for the body of this function.
+      auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
       auto ArgVal = DtorCGF.EmitLoadOfScalar(
           DtorCGF.GetAddrOfLocalVar(&Dst),
           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
@@ -1251,12 +1898,10 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
   // the condition and the dead arm of the if/else.
   bool CondConstant;
   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    if (CondConstant) {
+    if (CondConstant)
       ThenGen(CGF);
-    } else {
+    else
       ElseGen(CGF);
-    }
     return;
   }
 
@@ -1269,26 +1914,16 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
 
   // Emit the 'then' code.
   CGF.EmitBlock(ThenBlock);
-  {
-    CodeGenFunction::RunCleanupsScope ThenScope(CGF);
-    ThenGen(CGF);
-  }
+  ThenGen(CGF);
   CGF.EmitBranch(ContBlock);
   // Emit the 'else' code if present.
-  {
-    // There is no need to emit line number for unconditional branch.
-    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
-    CGF.EmitBlock(ElseBlock);
-  }
-  {
-    CodeGenFunction::RunCleanupsScope ThenScope(CGF);
-    ElseGen(CGF);
-  }
-  {
-    // There is no need to emit line number for unconditional branch.
-    auto NL = ApplyDebugLocation::CreateEmpty(CGF);
-    CGF.EmitBranch(ContBlock);
-  }
+  // There is no need to emit line number for unconditional branch.
+  (void)ApplyDebugLocation::CreateEmpty(CGF);
+  CGF.EmitBlock(ElseBlock);
+  ElseGen(CGF);
+  // There is no need to emit line number for unconditional branch.
+  (void)ApplyDebugLocation::CreateEmpty(CGF);
+  CGF.EmitBranch(ContBlock);
   // Emit the continuation block for code after the if.
   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
 }
@@ -1300,34 +1935,36 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
   if (!CGF.HaveInsertPoint())
     return;
   auto *RTLoc = emitUpdateLocation(CGF, Loc);
-  auto &&ThenGen = [this, OutlinedFn, CapturedVars,
-                    RTLoc](CodeGenFunction &CGF) {
+  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
+                                                     PrePostActionTy &) {
     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
+    auto &RT = CGF.CGM.getOpenMPRuntime();
     llvm::Value *Args[] = {
         RTLoc,
         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
-        CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
+        CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
     llvm::SmallVector<llvm::Value *, 16> RealArgs;
     RealArgs.append(std::begin(Args), std::end(Args));
     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
 
-    auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
+    auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
     CGF.EmitRuntimeCall(RTLFn, RealArgs);
   };
-  auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
-                    Loc](CodeGenFunction &CGF) {
-    auto ThreadID = getThreadID(CGF, Loc);
+  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
+                                                          PrePostActionTy &) {
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    auto ThreadID = RT.getThreadID(CGF, Loc);
     // Build calls:
     // __kmpc_serialized_parallel(&Loc, GTid);
     llvm::Value *Args[] = {RTLoc, ThreadID};
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
-                        Args);
+    CGF.EmitRuntimeCall(
+        RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
 
     // OutlinedFn(&GTid, &zero, CapturedStruct);
-    auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+    auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
     Address ZeroAddr =
-      CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
-                           /*Name*/ ".zero.addr");
+        CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
+                             /*Name*/ ".zero.addr");
     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
@@ -1336,15 +1973,16 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
 
     // __kmpc_end_serialized_parallel(&Loc, GTid);
-    llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
+    llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
     CGF.EmitRuntimeCall(
-        createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
+        RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
+        EndArgs);
   };
-  if (IfCond) {
+  if (IfCond)
     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
-  } else {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    ThenGen(CGF);
+  else {
+    RegionCodeGenTy ThenRCG(ThenGen);
+    ThenRCG(CGF);
   }
 }
 
@@ -1397,20 +2035,39 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
 }
 
 namespace {
-template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
-  llvm::Value *Callee;
-  llvm::Value *Args[N];
+/// Common pre(post)-action for different OpenMP constructs.
+class CommonActionTy final : public PrePostActionTy {
+  llvm::Value *EnterCallee;
+  ArrayRef<llvm::Value *> EnterArgs;
+  llvm::Value *ExitCallee;
+  ArrayRef<llvm::Value *> ExitArgs;
+  bool Conditional;
+  llvm::BasicBlock *ContBlock = nullptr;
 
 public:
-  CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
-      : Callee(Callee) {
-    assert(CleanupArgs.size() == N);
-    std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
+  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
+                 llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
+                 bool Conditional = false)
+      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
+        ExitArgs(ExitArgs), Conditional(Conditional) {}
+  void Enter(CodeGenFunction &CGF) override {
+    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
+    if (Conditional) {
+      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
+      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
+      ContBlock = CGF.createBasicBlock("omp_if.end");
+      // Generate the branch (If-stmt)
+      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
+      CGF.EmitBlock(ThenBlock);
+    }
   }
-  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
-    if (!CGF.HaveInsertPoint())
-      return;
-    CGF.EmitRuntimeCall(Callee, Args);
+  void Done(CodeGenFunction &CGF) {
+    // Emit the rest of blocks/branches
+    CGF.EmitBranch(ContBlock);
+    CGF.EmitBlock(ContBlock, true);
+  }
+  void Exit(CodeGenFunction &CGF) override {
+    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
   }
 };
 } // anonymous namespace
@@ -1425,45 +2082,22 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
   // Prepare arguments and build a call to __kmpc_critical
   if (!CGF.HaveInsertPoint())
     return;
-  CodeGenFunction::RunCleanupsScope Scope(CGF);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
                          getCriticalRegionLock(CriticalName)};
+  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
+                                                std::end(Args));
   if (Hint) {
-    llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
-                                                     std::end(Args));
-    auto *HintVal = CGF.EmitScalarExpr(Hint);
-    ArgsWithHint.push_back(
-        CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
-                        ArgsWithHint);
-  } else
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
-  // Build a call to __kmpc_end_critical
-  CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
-      NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
-      llvm::makeArrayRef(Args));
+    EnterArgs.push_back(CGF.Builder.CreateIntCast(
+        CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
+  }
+  CommonActionTy Action(
+      createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
+                                 : OMPRTL__kmpc_critical),
+      EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
+  CriticalOpGen.setAction(Action);
   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
 }
 
-static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
-                       OpenMPDirectiveKind Kind, SourceLocation Loc,
-                       const RegionCodeGenTy &BodyOpGen) {
-  llvm::Value *CallBool = CGF.EmitScalarConversion(
-      IfCond,
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
-      CGF.getContext().BoolTy, Loc);
-
-  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
-  auto *ContBlock = CGF.createBasicBlock("omp_if.end");
-  // Generate the branch (If-stmt)
-  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
-  CGF.EmitBlock(ThenBlock);
-  CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
-  // Emit the rest of bblocks/branches
-  CGF.EmitBranch(ContBlock);
-  CGF.EmitBlock(ContBlock, true);
-}
-
 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
                                        const RegionCodeGenTy &MasterOpGen,
                                        SourceLocation Loc) {
@@ -1475,18 +2109,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
   // }
   // Prepare arguments and build a call to __kmpc_master
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
-  auto *IsMaster =
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
-  typedef CallEndCleanup<std::extent<decltype(Args)>::value>
-      MasterCallEndCleanup;
-  emitIfStmt(
-      CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
-        CodeGenFunction::RunCleanupsScope Scope(CGF);
-        CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
-            NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
-            llvm::makeArrayRef(Args));
-        MasterOpGen(CGF);
-      });
+  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
+                        createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
+                        /*Conditional=*/true);
+  MasterOpGen.setAction(Action);
+  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
+  Action.Done(CGF);
 }
 
 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
@@ -1498,6 +2126,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
+  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+    Region->emitUntiedSwitch(CGF);
 }
 
 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
@@ -1509,16 +2139,12 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
   // TaskgroupOpGen();
   // __kmpc_end_taskgroup(ident_t *, gtid);
   // Prepare arguments and build a call to __kmpc_taskgroup
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
-    // Build a call to __kmpc_end_taskgroup
-    CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
-        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
-        llvm::makeArrayRef(Args));
-    emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
-  }
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
+  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
+                        createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
+                        Args);
+  TaskgroupOpGen.setAction(Action);
+  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
 }
 
 /// Given an array of pointers to variables, project the address of a
@@ -1549,9 +2175,7 @@ static llvm::Value *emitCopyprivateCopyFunction(
                            C.VoidPtrTy);
   Args.push_back(&LHSArg);
   Args.push_back(&RHSArg);
-  FunctionType::ExtInfo EI;
-  auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, Args, EI, /*isVariadic=*/false);
+  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       ".omp.copyprivate.copy_func", &CGM.getModule());
@@ -1616,22 +2240,16 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
   }
   // Prepare arguments and build a call to __kmpc_single
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
-  auto *IsSingle =
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
-  typedef CallEndCleanup<std::extent<decltype(Args)>::value>
-      SingleCallEndCleanup;
-  emitIfStmt(
-      CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
-        CodeGenFunction::RunCleanupsScope Scope(CGF);
-        CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
-            NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
-            llvm::makeArrayRef(Args));
-        SingleOpGen(CGF);
-        if (DidIt.isValid()) {
-          // did_it = 1;
-          CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
-        }
-      });
+  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
+                        createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
+                        /*Conditional=*/true);
+  SingleOpGen.setAction(Action);
+  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
+  if (DidIt.isValid()) {
+    // did_it = 1;
+    CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
+  }
+  Action.Done(CGF);
   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   // <copy_func>, did_it);
   if (DidIt.isValid()) {
@@ -1655,7 +2273,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
     auto *CpyFn = emitCopyprivateCopyFunction(
         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
-    auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
+    auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
     Address CL =
       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
                                                       CGF.VoidPtrTy);
@@ -1681,14 +2299,14 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
   // OrderedOpGen();
   // __kmpc_end_ordered(ident_t *, gtid);
   // Prepare arguments and build a call to __kmpc_ordered
-  CodeGenFunction::RunCleanupsScope Scope(CGF);
   if (IsThreads) {
     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
-    // Build a call to __kmpc_end_ordered
-    CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
-        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
-        llvm::makeArrayRef(Args));
+    CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
+                          createRuntimeFunction(OMPRTL__kmpc_end_ordered),
+                          Args);
+    OrderedOpGen.setAction(Action);
+    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
+    return;
   }
   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
 }
@@ -1700,21 +2318,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
     return;
   // Build call __kmpc_cancel_barrier(loc, thread_id);
   // Build call __kmpc_barrier(loc, thread_id);
-  OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
-  if (Kind == OMPD_for) {
-    Flags =
-        static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
-  } else if (Kind == OMPD_sections) {
-    Flags = static_cast<OpenMPLocationFlags>(Flags |
-                                             OMP_IDENT_BARRIER_IMPL_SECTIONS);
-  } else if (Kind == OMPD_single) {
-    Flags =
-        static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
-  } else if (Kind == OMPD_barrier) {
-    Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
-  } else {
-    Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
-  }
+  unsigned Flags;
+  if (Kind == OMPD_for)
+    Flags = OMP_IDENT_BARRIER_IMPL_FOR;
+  else if (Kind == OMPD_sections)
+    Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
+  else if (Kind == OMPD_single)
+    Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
+  else if (Kind == OMPD_barrier)
+    Flags = OMP_IDENT_BARRIER_EXPL;
+  else
+    Flags = OMP_IDENT_BARRIER_IMPL;
   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
   // thread_id);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
@@ -1745,28 +2359,6 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
 }
 
-/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
-/// the enum sched_type in kmp.h).
-enum OpenMPSchedType {
-  /// \brief Lower bound for default (unordered) versions.
-  OMP_sch_lower = 32,
-  OMP_sch_static_chunked = 33,
-  OMP_sch_static = 34,
-  OMP_sch_dynamic_chunked = 35,
-  OMP_sch_guided_chunked = 36,
-  OMP_sch_runtime = 37,
-  OMP_sch_auto = 38,
-  /// \brief Lower bound for 'ordered' versions.
-  OMP_ord_lower = 64,
-  OMP_ord_static_chunked = 65,
-  OMP_ord_static = 66,
-  OMP_ord_dynamic_chunked = 67,
-  OMP_ord_guided_chunked = 68,
-  OMP_ord_runtime = 69,
-  OMP_ord_auto = 70,
-  OMP_sch_default = OMP_sch_static,
-};
-
 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
                                           bool Chunked, bool Ordered) {
@@ -1789,12 +2381,26 @@ static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
   llvm_unreachable("Unexpected runtime schedule");
 }
 
+/// \brief Map the OpenMP distribute schedule to the runtime enumeration.
+static OpenMPSchedType
+getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
+  // only static is allowed for dist_schedule
+  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
+}
+
 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
                                          bool Chunked) const {
   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
   return Schedule == OMP_sch_static;
 }
 
+bool CGOpenMPRuntime::isStaticNonchunked(
+    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+  return Schedule == OMP_dist_sch_static;
+}
+
+
 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   auto Schedule =
       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
@@ -1802,19 +2408,57 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   return Schedule != OMP_sch_static;
 }
 
+static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
+                                  OpenMPScheduleClauseModifier M1,
+                                  OpenMPScheduleClauseModifier M2) {
+  int Modifier = 0;
+  switch (M1) {
+  case OMPC_SCHEDULE_MODIFIER_monotonic:
+    Modifier = OMP_sch_modifier_monotonic;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
+    Modifier = OMP_sch_modifier_nonmonotonic;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_simd:
+    if (Schedule == OMP_sch_static_chunked)
+      Schedule = OMP_sch_static_balanced_chunked;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_last:
+  case OMPC_SCHEDULE_MODIFIER_unknown:
+    break;
+  }
+  switch (M2) {
+  case OMPC_SCHEDULE_MODIFIER_monotonic:
+    Modifier = OMP_sch_modifier_monotonic;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
+    Modifier = OMP_sch_modifier_nonmonotonic;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_simd:
+    if (Schedule == OMP_sch_static_chunked)
+      Schedule = OMP_sch_static_balanced_chunked;
+    break;
+  case OMPC_SCHEDULE_MODIFIER_last:
+  case OMPC_SCHEDULE_MODIFIER_unknown:
+    break;
+  }
+  return Schedule | Modifier;
+}
+
 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
                                           SourceLocation Loc,
-                                          OpenMPScheduleClauseKind ScheduleKind,
+                                          const OpenMPScheduleTy &ScheduleKind,
                                           unsigned IVSize, bool IVSigned,
                                           bool Ordered, llvm::Value *UB,
                                           llvm::Value *Chunk) {
   if (!CGF.HaveInsertPoint())
     return;
   OpenMPSchedType Schedule =
-      getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
+      getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
   assert(Ordered ||
          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
-          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
+          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
+          Schedule != OMP_sch_static_balanced_chunked));
   // Call __kmpc_dispatch_init(
   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
@@ -1824,59 +2468,94 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
   if (Chunk == nullptr)
     Chunk = CGF.Builder.getIntN(IVSize, 1);
   llvm::Value *Args[] = {
-    emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-    getThreadID(CGF, Loc),
-    CGF.Builder.getInt32(Schedule), // Schedule type
-    CGF.Builder.getIntN(IVSize, 0), // Lower
-    UB,                             // Upper
-    CGF.Builder.getIntN(IVSize, 1), // Stride
-    Chunk                           // Chunk
+      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+      CGF.Builder.getInt32(addMonoNonMonoModifier(
+          Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
+      CGF.Builder.getIntN(IVSize, 0),                   // Lower
+      UB,                                               // Upper
+      CGF.Builder.getIntN(IVSize, 1),                   // Stride
+      Chunk                                             // Chunk
   };
   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
 }
 
+static void emitForStaticInitCall(
+    CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
+    llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
+    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
+    unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
+    Address ST, llvm::Value *Chunk) {
+  if (!CGF.HaveInsertPoint())
+     return;
+
+   assert(!Ordered);
+   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
+          Schedule == OMP_sch_static_balanced_chunked ||
+          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
+          Schedule == OMP_dist_sch_static ||
+          Schedule == OMP_dist_sch_static_chunked);
+
+   // Call __kmpc_for_static_init(
+   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
+   if (Chunk == nullptr) {
+     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
+             Schedule == OMP_dist_sch_static) &&
+            "expected static non-chunked schedule");
+     // If the Chunk was not specified in the clause - use default value 1.
+       Chunk = CGF.Builder.getIntN(IVSize, 1);
+   } else {
+     assert((Schedule == OMP_sch_static_chunked ||
+             Schedule == OMP_sch_static_balanced_chunked ||
+             Schedule == OMP_ord_static_chunked ||
+             Schedule == OMP_dist_sch_static_chunked) &&
+            "expected static chunked schedule");
+   }
+   llvm::Value *Args[] = {
+       UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
+                                     Schedule, M1, M2)), // Schedule type
+       IL.getPointer(),                                  // &isLastIter
+       LB.getPointer(),                                  // &LB
+       UB.getPointer(),                                  // &UB
+       ST.getPointer(),                                  // &Stride
+       CGF.Builder.getIntN(IVSize, 1),                   // Incr
+       Chunk                                             // Chunk
+   };
+   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
+}
+
 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
                                         SourceLocation Loc,
-                                        OpenMPScheduleClauseKind ScheduleKind,
+                                        const OpenMPScheduleTy &ScheduleKind,
                                         unsigned IVSize, bool IVSigned,
                                         bool Ordered, Address IL, Address LB,
                                         Address UB, Address ST,
                                         llvm::Value *Chunk) {
-  if (!CGF.HaveInsertPoint())
-    return;
-  OpenMPSchedType Schedule =
-    getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
-  assert(!Ordered);
-  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
-         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
-
-  // Call __kmpc_for_static_init(
-  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
-  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
-  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
-  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
-  if (Chunk == nullptr) {
-    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
-           "expected static non-chunked schedule");
-    // If the Chunk was not specified in the clause - use default value 1.
-      Chunk = CGF.Builder.getIntN(IVSize, 1);
-  } else {
-    assert((Schedule == OMP_sch_static_chunked ||
-            Schedule == OMP_ord_static_chunked) &&
-           "expected static chunked schedule");
-  }
-  llvm::Value *Args[] = {
-    emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-    getThreadID(CGF, Loc),
-    CGF.Builder.getInt32(Schedule), // Schedule type
-    IL.getPointer(),                // &isLastIter
-    LB.getPointer(),                // &LB
-    UB.getPointer(),                // &UB
-    ST.getPointer(),                // &Stride
-    CGF.Builder.getIntN(IVSize, 1), // Incr
-    Chunk                           // Chunk
-  };
-  CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
+  OpenMPSchedType ScheduleNum =
+      getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
+  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
+  auto *ThreadId = getThreadID(CGF, Loc);
+  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
+  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
+                        ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
+                        Ordered, IL, LB, UB, ST, Chunk);
+}
+
+void CGOpenMPRuntime::emitDistributeStaticInit(
+    CodeGenFunction &CGF, SourceLocation Loc,
+    OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
+    bool Ordered, Address IL, Address LB, Address UB, Address ST,
+    llvm::Value *Chunk) {
+  OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
+  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
+  auto *ThreadId = getThreadID(CGF, Loc);
+  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
+  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
+                        ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
+                        OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
+                        UB, ST, Chunk);
 }
 
 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
@@ -1884,8 +2563,7 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
   if (!CGF.HaveInsertPoint())
     return;
   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
-  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-                         getThreadID(CGF, Loc)};
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
                       Args);
 }
@@ -1897,8 +2575,7 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
   if (!CGF.HaveInsertPoint())
     return;
   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
-  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-                         getThreadID(CGF, Loc)};
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
 }
 
@@ -1912,7 +2589,8 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
   //          kmp_int[32|64] *p_stride);
   llvm::Value *Args[] = {
-      emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
+      emitUpdateLocation(CGF, Loc),
+      getThreadID(CGF, Loc),
       IL.getPointer(), // &isLastIter
       LB.getPointer(), // &Lower
       UB.getPointer(), // &Upper
@@ -1991,8 +2669,18 @@ enum KmpTaskTFields {
   KmpTaskTRoutine,
   /// \brief Partition id for the untied tasks.
   KmpTaskTPartId,
-  /// \brief Function with call of destructors for private variables.
-  KmpTaskTDestructors,
+  /// Function with call of destructors for private variables.
+  Data1,
+  /// Task priority.
+  Data2,
+  /// (Taskloops only) Lower bound.
+  KmpTaskTLowerBound,
+  /// (Taskloops only) Upper bound.
+  KmpTaskTUpperBound,
+  /// (Taskloops only) Stride.
+  KmpTaskTStride,
+  /// (Taskloops only) Is last iteration flag.
+  KmpTaskTLastIter,
 };
 } // anonymous namespace
 
@@ -2005,11 +2693,11 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
                                     StringRef ParentName, unsigned LineNum,
-                                    unsigned ColNum, unsigned Order) {
+                                    unsigned Order) {
   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
                                              "only required for the device "
                                              "code generation.");
-  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
+  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
   ++OffloadingEntriesNum;
 }
@@ -2017,30 +2705,27 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
                                   StringRef ParentName, unsigned LineNum,
-                                  unsigned ColNum, llvm::Constant *Addr,
-                                  llvm::Constant *ID) {
+                                  llvm::Constant *Addr, llvm::Constant *ID) {
   // If we are emitting code for a target, the entry is already initialized,
   // only has to be registered.
   if (CGM.getLangOpts().OpenMPIsDevice) {
-    assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
-                                    ColNum) &&
+    assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
            "Entry must exist.");
-    auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName]
-                                            [LineNum][ColNum];
+    auto &Entry =
+        OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
     assert(Entry.isValid() && "Entry not initialized!");
     Entry.setAddress(Addr);
     Entry.setID(ID);
     return;
   } else {
     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
-    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
-        Entry;
+    OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
   }
 }
 
 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
-    unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
-    unsigned ColNum) const {
+    unsigned DeviceID, unsigned FileID, StringRef ParentName,
+    unsigned LineNum) const {
   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
   if (PerDevice == OffloadEntriesTargetRegion.end())
     return false;
@@ -2053,11 +2738,8 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
   auto PerLine = PerParentName->second.find(LineNum);
   if (PerLine == PerParentName->second.end())
     return false;
-  auto PerColumn = PerLine->second.find(ColNum);
-  if (PerColumn == PerLine->second.end())
-    return false;
   // Fail if this entry is already registered.
-  if (PerColumn->second.getAddress() || PerColumn->second.getID())
+  if (PerLine->second.getAddress() || PerLine->second.getID())
     return false;
   return true;
 }
@@ -2069,8 +2751,7 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
     for (auto &F : D.second)
       for (auto &P : F.second)
         for (auto &L : P.second)
-          for (auto &C : L.second)
-            Action(D.first, F.first, P.first(), L.first, C.first, C.second);
+          Action(D.first, F.first, P.first(), L.first, L.second);
 }
 
 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
@@ -2087,9 +2768,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
 
   CodeGenFunction CGF(CGM);
   GlobalDecl();
-  auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, Args, FunctionType::ExtInfo(),
-      /*isVariadic=*/false);
+  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto FTy = CGM.getTypes().GetFunctionType(FI);
   auto *Fn =
       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
@@ -2123,11 +2802,11 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
       M, OffloadEntryTy, /*isConstant=*/true,
-      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
+      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
       ".omp_offloading.entries_begin");
   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
       M, OffloadEntryTy, /*isConstant=*/true,
-      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
+      llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
       ".omp_offloading.entries_end");
 
   // Create all device images
@@ -2139,10 +2818,11 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
     StringRef T = Devices[i].getTriple();
     auto *ImgBegin = new llvm::GlobalVariable(
         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
-        /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T));
+        /*Initializer=*/nullptr,
+        Twine(".omp_offloading.img_start.") + Twine(T));
     auto *ImgEnd = new llvm::GlobalVariable(
         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
-        /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T));
+        /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
 
     llvm::Constant *Dev =
         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
@@ -2160,7 +2840,7 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
       M, DeviceImagesInitTy, /*isConstant=*/true,
       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
       ".omp_offloading.device_images");
-  DeviceImages->setUnnamedAddr(true);
+  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   // This is a Zero array to be used in the creation of the constant expressions
   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
@@ -2190,12 +2870,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
                                 IdentInfo, C.CharTy);
 
   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
-      CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) {
+      CGM, ".omp_offloading.descriptor_unreg",
+      [&](CodeGenFunction &CGF, PrePostActionTy &) {
         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
                              Desc);
       });
   auto *RegFn = createOffloadingBinaryDescriptorFunction(
-      CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) {
+      CGM, ".omp_offloading.descriptor_reg",
+      [&](CodeGenFunction &CGF, PrePostActionTy &) {
         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
                              Desc);
         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
@@ -2203,15 +2885,16 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
   return RegFn;
 }
 
-void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
-                                         uint64_t Size) {
+void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
+                                         llvm::Constant *Addr, uint64_t Size) {
+  StringRef Name = Addr->getName();
   auto *TgtOffloadEntryType = cast<llvm::StructType>(
       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
   llvm::LLVMContext &C = CGM.getModule().getContext();
   llvm::Module &M = CGM.getModule();
 
   // Make sure the address has the right type.
-  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy);
+  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
 
   // Create constant string with the name.
   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
@@ -2220,7 +2903,7 @@ void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
                                ".omp_offloading.entry_name");
-  Str->setUnnamedAddr(true);
+  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
 
   // Create the entry struct.
@@ -2236,7 +2919,6 @@ void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
   // We can't have any padding between symbols, so we need to have 1-byte
   // alignment.
   Entry->setAlignment(1);
-  return;
 }
 
 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
@@ -2272,7 +2954,6 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
   // Create function that emits metadata for each target region entry;
   auto &&TargetRegionMetadataEmitter = [&](
       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
-      unsigned Column,
       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
     llvm::SmallVector<llvm::Metadata *, 32> Ops;
     // Generate metadata for target regions. Each entry of this metadata
@@ -2282,15 +2963,13 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
     // - Entry 2 -> File ID of the file where the entry was identified.
     // - Entry 3 -> Mangled name of the function where the entry was identified.
     // - Entry 4 -> Line in the file where the entry was identified.
-    // - Entry 5 -> Column in the file where the entry was identified.
-    // - Entry 6 -> Order the entry was created.
+    // - Entry 5 -> Order the entry was created.
     // The first element of the metadata node is the kind.
     Ops.push_back(getMDInt(E.getKind()));
     Ops.push_back(getMDInt(DeviceID));
     Ops.push_back(getMDInt(FileID));
     Ops.push_back(getMDString(ParentName));
     Ops.push_back(getMDInt(Line));
-    Ops.push_back(getMDInt(Column));
     Ops.push_back(getMDInt(E.getOrder()));
 
     // Save this entry in the right position of the ordered entries array.
@@ -2310,7 +2989,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
                 E)) {
       assert(CE->getID() && CE->getAddress() &&
              "Entry ID and Addr are invalid!");
-      createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0);
+      createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
     } else
       llvm_unreachable("Unsupported entry kind.");
   }
@@ -2365,7 +3044,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() {
       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
-          /*Column=*/getMDInt(5), /*Order=*/getMDInt(6));
+          /*Order=*/getMDInt(5));
       break;
     }
   }
@@ -2509,21 +3188,45 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
 }
 
 static RecordDecl *
-createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
+createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
+                         QualType KmpInt32Ty,
                          QualType KmpRoutineEntryPointerQTy) {
   auto &C = CGM.getContext();
   // Build struct kmp_task_t {
   //         void *              shareds;
   //         kmp_routine_entry_t routine;
   //         kmp_int32           part_id;
-  //         kmp_routine_entry_t destructors;
+  //         kmp_cmplrdata_t data1;
+  //         kmp_cmplrdata_t data2;
+  // For taskloops additional fields:
+  //         kmp_uint64          lb;
+  //         kmp_uint64          ub;
+  //         kmp_int64           st;
+  //         kmp_int32           liter;
   //       };
+  auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
+  UD->startDefinition();
+  addFieldToRecordDecl(C, UD, KmpInt32Ty);
+  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
+  UD->completeDefinition();
+  QualType KmpCmplrdataTy = C.getRecordType(UD);
   auto *RD = C.buildImplicitRecord("kmp_task_t");
   RD->startDefinition();
   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
   addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
+  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
+  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
+  if (isOpenMPTaskLoopDirective(Kind)) {
+    QualType KmpUInt64Ty =
+        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+    QualType KmpInt64Ty =
+        CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpUInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpInt64Ty);
+    addFieldToRecordDecl(C, RD, KmpInt32Ty);
+  }
   RD->completeDefinition();
   return RD;
 }
@@ -2550,14 +3253,17 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
 /// argument.
 /// \code
 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
-///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
+///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
+///   For taskloops:
+///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
 ///   tt->shareds);
 ///   return 0;
 /// }
 /// \endcode
 static llvm::Value *
 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
-                      QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
+                      OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
+                      QualType KmpTaskTWithPrivatesPtrQTy,
                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
                       llvm::Value *TaskPrivatesMap) {
@@ -2569,10 +3275,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
   Args.push_back(&GtidArg);
   Args.push_back(&TaskTypeArg);
-  FunctionType::ExtInfo Info;
   auto &TaskEntryFnInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
-                                                    /*isVariadic=*/false);
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
   auto *TaskEntry =
       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
@@ -2583,11 +3287,15 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
 
   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
+  // tt,
+  // For taskloops:
+  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
   // tt->task_data.shareds);
   auto *GtidParam = CGF.EmitLoadOfScalar(
       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
-  LValue TDBase = emitLoadOfPointerLValue(
-      CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
+  LValue TDBase = CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(&TaskTypeArg),
+      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
   auto *KmpTaskTWithPrivatesQTyRD =
       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
   LValue Base =
@@ -2595,7 +3303,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
-  auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
+  auto *PartidParam = PartIdLVal.getPointer();
 
   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
@@ -2609,12 +3317,37 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
-  } else {
+  } else
     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+
+  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
+                               TaskPrivatesMap,
+                               CGF.Builder
+                                   .CreatePointerBitCastOrAddrSpaceCast(
+                                       TDBase.getAddress(), CGF.VoidPtrTy)
+                                   .getPointer()};
+  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
+                                          std::end(CommonArgs));
+  if (isOpenMPTaskLoopDirective(Kind)) {
+    auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
+    auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
+    auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
+    auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
+    auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
+    auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
+    auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
+    auto StLVal = CGF.EmitLValueForField(Base, *StFI);
+    auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
+    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
+    auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
+    auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+    CallArgs.push_back(LBParam);
+    CallArgs.push_back(UBParam);
+    CallArgs.push_back(StParam);
+    CallArgs.push_back(LIParam);
   }
+  CallArgs.push_back(SharedsParam);
 
-  llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
-                             TaskPrivatesMap, SharedsParam};
   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
   CGF.EmitStoreThroughLValue(
       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
@@ -2638,8 +3371,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
   Args.push_back(&TaskTypeArg);
   FunctionType::ExtInfo Info;
   auto &DestructorFnInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
-                                                    /*isVariadic=*/false);
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
   auto *DestructorFn =
       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
@@ -2651,8 +3383,9 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
                     Args);
 
-  LValue Base = emitLoadOfPointerLValue(
-      CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
+  LValue Base = CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(&TaskTypeArg),
+      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
   auto *KmpTaskTWithPrivatesQTyRD =
       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
@@ -2682,6 +3415,7 @@ static llvm::Value *
 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
                                ArrayRef<const Expr *> PrivateVars,
                                ArrayRef<const Expr *> FirstprivateVars,
+                               ArrayRef<const Expr *> LastprivateVars,
                                QualType PrivatesQTy,
                                ArrayRef<PrivateDataTy> Privates) {
   auto &C = CGM.getContext();
@@ -2712,10 +3446,18 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
     PrivateVarsPos[VD] = Counter;
     ++Counter;
   }
-  FunctionType::ExtInfo Info;
+  for (auto *E: LastprivateVars) {
+    Args.push_back(ImplicitParamDecl::Create(
+        C, /*DC=*/nullptr, Loc,
+        /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
+                            .withConst()
+                            .withRestrict()));
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    PrivateVarsPos[VD] = Counter;
+    ++Counter;
+  }
   auto &TaskPrivatesMapFnInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
-                                                    /*isVariadic=*/false);
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *TaskPrivatesMapTy =
       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
   auto *TaskPrivatesMap = llvm::Function::Create(
@@ -2730,16 +3472,17 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
                     TaskPrivatesMapFnInfo, Args);
 
   // *privi = &.privates.privi;
-  LValue Base = emitLoadOfPointerLValue(
-      CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
+  LValue Base = CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
+      TaskPrivatesArg.getType()->castAs<PointerType>());
   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
   Counter = 0;
   for (auto *Field : PrivatesQTyRD->fields()) {
     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
-    auto RefLoadLVal =
-        emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
+    auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
+        RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
     ++Counter;
   }
@@ -2752,23 +3495,199 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1,
   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
 }
 
-void CGOpenMPRuntime::emitTaskCall(
-    CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
-    bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
-    llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
-    const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
-    ArrayRef<const Expr *> PrivateCopies,
-    ArrayRef<const Expr *> FirstprivateVars,
-    ArrayRef<const Expr *> FirstprivateCopies,
-    ArrayRef<const Expr *> FirstprivateInits,
-    ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
-  if (!CGF.HaveInsertPoint())
-    return;
+/// Emit initialization for private variables in task-based directives.
+static void emitPrivatesInit(CodeGenFunction &CGF,
+                             const OMPExecutableDirective &D,
+                             Address KmpTaskSharedsPtr, LValue TDBase,
+                             const RecordDecl *KmpTaskTWithPrivatesQTyRD,
+                             QualType SharedsTy, QualType SharedsPtrTy,
+                             const OMPTaskDataTy &Data,
+                             ArrayRef<PrivateDataTy> Privates, bool ForDup) {
+  auto &C = CGF.getContext();
+  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
+  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
+  LValue SrcBase;
+  if (!Data.FirstprivateVars.empty()) {
+    SrcBase = CGF.MakeAddrLValue(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
+        SharedsTy);
+  }
+  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
+      cast<CapturedStmt>(*D.getAssociatedStmt()));
+  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
+  for (auto &&Pair : Privates) {
+    auto *VD = Pair.second.PrivateCopy;
+    auto *Init = VD->getAnyInitializer();
+    if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
+                             !CGF.isTrivialInitializer(Init)))) {
+      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
+      if (auto *Elem = Pair.second.PrivateElemInit) {
+        auto *OriginalVD = Pair.second.Original;
+        auto *SharedField = CapturesInfo.lookup(OriginalVD);
+        auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
+        SharedRefLValue = CGF.MakeAddrLValue(
+            Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
+            SharedRefLValue.getType(), AlignmentSource::Decl);
+        QualType Type = OriginalVD->getType();
+        if (Type->isArrayType()) {
+          // Initialize firstprivate array.
+          if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
+            // Perform simple memcpy.
+            CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
+                                    SharedRefLValue.getAddress(), Type);
+          } else {
+            // Initialize firstprivate array using element-by-element
+            // intialization.
+            CGF.EmitOMPAggregateAssign(
+                PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
+                [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
+                                                  Address SrcElement) {
+                  // Clean up any temporaries needed by the initialization.
+                  CodeGenFunction::OMPPrivateScope InitScope(CGF);
+                  InitScope.addPrivate(
+                      Elem, [SrcElement]() -> Address { return SrcElement; });
+                  (void)InitScope.Privatize();
+                  // Emit initialization for single element.
+                  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
+                      CGF, &CapturesInfo);
+                  CGF.EmitAnyExprToMem(Init, DestElement,
+                                       Init->getType().getQualifiers(),
+                                       /*IsInitializer=*/false);
+                });
+          }
+        } else {
+          CodeGenFunction::OMPPrivateScope InitScope(CGF);
+          InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
+            return SharedRefLValue.getAddress();
+          });
+          (void)InitScope.Privatize();
+          CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
+          CGF.EmitExprAsInit(Init, VD, PrivateLValue,
+                             /*capturedByInit=*/false);
+        }
+      } else
+        CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
+    }
+    ++FI;
+  }
+}
+
+/// Check if duplication function is required for taskloops.
+static bool checkInitIsRequired(CodeGenFunction &CGF,
+                                ArrayRef<PrivateDataTy> Privates) {
+  bool InitRequired = false;
+  for (auto &&Pair : Privates) {
+    auto *VD = Pair.second.PrivateCopy;
+    auto *Init = VD->getAnyInitializer();
+    InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
+                                    !CGF.isTrivialInitializer(Init));
+  }
+  return InitRequired;
+}
+
+
+/// Emit task_dup function (for initialization of
+/// private/firstprivate/lastprivate vars and last_iter flag)
+/// \code
+/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
+/// lastpriv) {
+/// // setup lastprivate flag
+///    task_dst->last = lastpriv;
+/// // could be constructor calls here...
+/// }
+/// \endcode
+static llvm::Value *
+emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
+                    const OMPExecutableDirective &D,
+                    QualType KmpTaskTWithPrivatesPtrQTy,
+                    const RecordDecl *KmpTaskTWithPrivatesQTyRD,
+                    const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
+                    QualType SharedsPtrTy, const OMPTaskDataTy &Data,
+                    ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
+  auto &C = CGM.getContext();
+  FunctionArgList Args;
+  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
+                           /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
+  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
+                           /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
+  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
+                                /*Id=*/nullptr, C.IntTy);
+  Args.push_back(&DstArg);
+  Args.push_back(&SrcArg);
+  Args.push_back(&LastprivArg);
+  auto &TaskDupFnInfo =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
+  auto *TaskDup =
+      llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage,
+                             ".omp_task_dup.", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
+  CodeGenFunction CGF(CGM);
+  CGF.disableDebugInfo();
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
+
+  LValue TDBase = CGF.EmitLoadOfPointerLValue(
+      CGF.GetAddrOfLocalVar(&DstArg),
+      KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
+  // task_dst->liter = lastpriv;
+  if (WithLastIter) {
+    auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
+    LValue Base = CGF.EmitLValueForField(
+        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
+    LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
+    llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
+        CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
+    CGF.EmitStoreOfScalar(Lastpriv, LILVal);
+  }
+
+  // Emit initial values for private copies (if any).
+  assert(!Privates.empty());
+  Address KmpTaskSharedsPtr = Address::invalid();
+  if (!Data.FirstprivateVars.empty()) {
+    LValue TDBase = CGF.EmitLoadOfPointerLValue(
+        CGF.GetAddrOfLocalVar(&SrcArg),
+        KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
+    LValue Base = CGF.EmitLValueForField(
+        TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
+    KmpTaskSharedsPtr = Address(
+        CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
+                                 Base, *std::next(KmpTaskTQTyRD->field_begin(),
+                                                  KmpTaskTShareds)),
+                             Loc),
+        CGF.getNaturalTypeAlignment(SharedsTy));
+  }
+  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
+                   SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
+  CGF.FinishFunction();
+  return TaskDup;
+}
+
+/// Checks if destructor function is required to be generated.
+/// \return true if cleanups are required, false otherwise.
+static bool
+checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
+  bool NeedsCleanup = false;
+  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
+  auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
+  for (auto *FD : PrivateRD->fields()) {
+    NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
+    if (NeedsCleanup)
+      break;
+  }
+  return NeedsCleanup;
+}
+
+CGOpenMPRuntime::TaskResultTy
+CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
+                              const OMPExecutableDirective &D,
+                              llvm::Value *TaskFunction, QualType SharedsTy,
+                              Address Shareds, const OMPTaskDataTy &Data) {
   auto &C = CGM.getContext();
-  llvm::SmallVector<PrivateDataTy, 8> Privates;
+  llvm::SmallVector<PrivateDataTy, 4> Privates;
   // Aggregate privates and sort them by the alignment.
-  auto I = PrivateCopies.begin();
-  for (auto *E : PrivateVars) {
+  auto I = Data.PrivateCopies.begin();
+  for (auto *E : Data.PrivateVars) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     Privates.push_back(std::make_pair(
         C.getDeclAlign(VD),
@@ -2776,16 +3695,26 @@ void CGOpenMPRuntime::emitTaskCall(
                          /*PrivateElemInit=*/nullptr)));
     ++I;
   }
-  I = FirstprivateCopies.begin();
-  auto IElemInitRef = FirstprivateInits.begin();
-  for (auto *E : FirstprivateVars) {
+  I = Data.FirstprivateCopies.begin();
+  auto IElemInitRef = Data.FirstprivateInits.begin();
+  for (auto *E : Data.FirstprivateVars) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     Privates.push_back(std::make_pair(
         C.getDeclAlign(VD),
         PrivateHelpersTy(
             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
-    ++I, ++IElemInitRef;
+    ++I;
+    ++IElemInitRef;
+  }
+  I = Data.LastprivateCopies.begin();
+  for (auto *E : Data.LastprivateVars) {
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    Privates.push_back(std::make_pair(
+        C.getDeclAlign(VD),
+        PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
+                         /*PrivateElemInit=*/nullptr)));
+    ++I;
   }
   llvm::array_pod_sort(Privates.begin(), Privates.end(),
                        array_pod_sort_comparator);
@@ -2794,8 +3723,8 @@ void CGOpenMPRuntime::emitTaskCall(
   emitKmpRoutineEntryT(KmpInt32Ty);
   // Build type kmp_task_t (if not built yet).
   if (KmpTaskTQTy.isNull()) {
-    KmpTaskTQTy = C.getRecordType(
-        createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
+    KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
+        CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
   }
   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   // Build particular struct kmp_task_t for the given task.
@@ -2806,7 +3735,7 @@ void CGOpenMPRuntime::emitTaskCall(
       C.getPointerType(KmpTaskTWithPrivatesQTy);
   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
-  auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
+  auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
 
   // Emit initial values for private copies (if any).
@@ -2818,7 +3747,8 @@ void CGOpenMPRuntime::emitTaskCall(
   if (!Privates.empty()) {
     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
     TaskPrivatesMap = emitTaskPrivateMappingFunction(
-        CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
+        CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
+        FI->getType(), Privates);
     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
         TaskPrivatesMap, TaskPrivatesMapTy);
   } else {
@@ -2828,8 +3758,9 @@ void CGOpenMPRuntime::emitTaskCall(
   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
   // kmp_task_t *tt);
   auto *TaskEntry = emitProxyTaskFunction(
-      CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
-      KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
+      CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
+      KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
+      TaskPrivatesMap);
 
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
@@ -2837,15 +3768,27 @@ void CGOpenMPRuntime::emitTaskCall(
   // Task flags. Format is taken from
   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
   // description of kmp_tasking_flags struct.
-  const unsigned TiedFlag = 0x1;
-  const unsigned FinalFlag = 0x2;
-  unsigned Flags = Tied ? TiedFlag : 0;
+  enum {
+    TiedFlag = 0x1,
+    FinalFlag = 0x2,
+    DestructorsFlag = 0x8,
+    PriorityFlag = 0x20
+  };
+  unsigned Flags = Data.Tied ? TiedFlag : 0;
+  bool NeedsCleanup = false;
+  if (!Privates.empty()) {
+    NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
+    if (NeedsCleanup)
+      Flags = Flags | DestructorsFlag;
+  }
+  if (Data.Priority.getInt())
+    Flags = Flags | PriorityFlag;
   auto *TaskFlags =
-      Final.getPointer()
-          ? CGF.Builder.CreateSelect(Final.getPointer(),
+      Data.Final.getPointer()
+          ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
                                      CGF.Builder.getInt32(FinalFlag),
                                      CGF.Builder.getInt32(/*C=*/0))
-          : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
+          : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
@@ -2875,96 +3818,71 @@ void CGOpenMPRuntime::emitTaskCall(
     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
   }
   // Emit initial values for private copies (if any).
-  bool NeedsCleanup = false;
+  TaskResultTy Result;
   if (!Privates.empty()) {
-    auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
-    auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
-    FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
-    LValue SharedsBase;
-    if (!FirstprivateVars.empty()) {
-      SharedsBase = CGF.MakeAddrLValue(
-          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-              KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
-          SharedsTy);
-    }
-    CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
-        cast<CapturedStmt>(*D.getAssociatedStmt()));
-    for (auto &&Pair : Privates) {
-      auto *VD = Pair.second.PrivateCopy;
-      auto *Init = VD->getAnyInitializer();
-      LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
-      if (Init) {
-        if (auto *Elem = Pair.second.PrivateElemInit) {
-          auto *OriginalVD = Pair.second.Original;
-          auto *SharedField = CapturesInfo.lookup(OriginalVD);
-          auto SharedRefLValue =
-              CGF.EmitLValueForField(SharedsBase, SharedField);
-          SharedRefLValue = CGF.MakeAddrLValue(
-              Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
-              SharedRefLValue.getType(), AlignmentSource::Decl);
-          QualType Type = OriginalVD->getType();
-          if (Type->isArrayType()) {
-            // Initialize firstprivate array.
-            if (!isa<CXXConstructExpr>(Init) ||
-                CGF.isTrivialInitializer(Init)) {
-              // Perform simple memcpy.
-              CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
-                                      SharedRefLValue.getAddress(), Type);
-            } else {
-              // Initialize firstprivate array using element-by-element
-              // intialization.
-              CGF.EmitOMPAggregateAssign(
-                  PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
-                  Type, [&CGF, Elem, Init, &CapturesInfo](
-                            Address DestElement, Address SrcElement) {
-                    // Clean up any temporaries needed by the initialization.
-                    CodeGenFunction::OMPPrivateScope InitScope(CGF);
-                    InitScope.addPrivate(Elem, [SrcElement]() -> Address {
-                      return SrcElement;
-                    });
-                    (void)InitScope.Privatize();
-                    // Emit initialization for single element.
-                    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
-                        CGF, &CapturesInfo);
-                    CGF.EmitAnyExprToMem(Init, DestElement,
-                                         Init->getType().getQualifiers(),
-                                         /*IsInitializer=*/false);
-                  });
-            }
-          } else {
-            CodeGenFunction::OMPPrivateScope InitScope(CGF);
-            InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
-              return SharedRefLValue.getAddress();
-            });
-            (void)InitScope.Privatize();
-            CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
-            CGF.EmitExprAsInit(Init, VD, PrivateLValue,
-                               /*capturedByInit=*/false);
-          }
-        } else {
-          CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
-        }
-      }
-      NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
-      ++FI;
+    emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
+                     SharedsTy, SharedsPtrTy, Data, Privates,
+                     /*ForDup=*/false);
+    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
+        (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
+      Result.TaskDupFn = emitTaskDupFunction(
+          CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
+          KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
+          /*WithLastIter=*/!Data.LastprivateVars.empty());
     }
   }
+  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
+  enum { Priority = 0, Destructors = 1 };
   // Provide pointer to function with destructors for privates.
-  llvm::Value *DestructorFn =
-      NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
-                                             KmpTaskTWithPrivatesPtrQTy,
-                                             KmpTaskTWithPrivatesQTy)
-                   : llvm::ConstantPointerNull::get(
-                         cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
-  LValue Destructor = CGF.EmitLValueForField(
-      TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
-  CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-                            DestructorFn, KmpRoutineEntryPtrTy),
-                        Destructor);
+  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
+  auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
+  if (NeedsCleanup) {
+    llvm::Value *DestructorFn = emitDestructorsFunction(
+        CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
+        KmpTaskTWithPrivatesQTy);
+    LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
+    LValue DestructorsLV = CGF.EmitLValueForField(
+        Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
+    CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                              DestructorFn, KmpRoutineEntryPtrTy),
+                          DestructorsLV);
+  }
+  // Set priority.
+  if (Data.Priority.getInt()) {
+    LValue Data2LV = CGF.EmitLValueForField(
+        TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
+    LValue PriorityLV = CGF.EmitLValueForField(
+        Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
+    CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
+  }
+  Result.NewTask = NewTask;
+  Result.TaskEntry = TaskEntry;
+  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
+  Result.TDBase = TDBase;
+  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
+  return Result;
+}
+
+void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                   const OMPExecutableDirective &D,
+                                   llvm::Value *TaskFunction,
+                                   QualType SharedsTy, Address Shareds,
+                                   const Expr *IfCond,
+                                   const OMPTaskDataTy &Data) {
+  if (!CGF.HaveInsertPoint())
+    return;
 
+  TaskResultTy Result =
+      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
+  llvm::Value *NewTask = Result.NewTask;
+  llvm::Value *TaskEntry = Result.TaskEntry;
+  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
+  LValue TDBase = Result.TDBase;
+  RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
+  auto &C = CGM.getContext();
   // Process list of dependences.
   Address DependenciesArray = Address::invalid();
-  unsigned NumDependencies = Dependences.size();
+  unsigned NumDependencies = Data.Dependences.size();
   if (NumDependencies) {
     // Dependence kind for RTL.
     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
@@ -2981,18 +3899,18 @@ void CGOpenMPRuntime::emitTaskCall(
       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
       KmpDependInfoRD->completeDefinition();
       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
-    } else {
+    } else
       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
-    }
     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
     // Define type kmp_depend_info[<Dependences.size()>];
     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
         ArrayType::Normal, /*IndexTypeQuals=*/0);
     // kmp_depend_info[<Dependences.size()>] deps;
-    DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
+    DependenciesArray =
+        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
     for (unsigned i = 0; i < NumDependencies; ++i) {
-      const Expr *E = Dependences[i].second;
+      const Expr *E = Data.Dependences[i].second;
       auto Addr = CGF.EmitLValue(E);
       llvm::Value *Size;
       QualType Ty = E->getType();
@@ -3006,7 +3924,7 @@ void CGOpenMPRuntime::emitTaskCall(
         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
       } else
-        Size = getTypeSize(CGF, Ty);
+        Size = CGF.getTypeSize(Ty);
       auto Base = CGF.MakeAddrLValue(
           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
           KmpDependInfoTy);
@@ -3022,7 +3940,7 @@ void CGOpenMPRuntime::emitTaskCall(
       CGF.EmitStoreOfScalar(Size, LenLVal);
       // deps[i].flags = <Dependences[i].first>;
       RTLDependenceKindTy DepKind;
-      switch (Dependences[i].first) {
+      switch (Data.Dependences[i].first) {
       case OMPC_DEPEND_in:
         DepKind = DepIn;
         break;
@@ -3048,8 +3966,6 @@ void CGOpenMPRuntime::emitTaskCall(
 
   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
   // libcall.
-  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
-  // *new_task);
   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
@@ -3067,19 +3983,26 @@ void CGOpenMPRuntime::emitTaskCall(
     DepTaskArgs[5] = CGF.Builder.getInt32(0);
     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   }
-  auto &&ThenCodeGen = [this, NumDependencies,
-                        &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
-    // TODO: add check for untied tasks.    
+  auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
+                        NumDependencies, &TaskArgs,
+                        &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
+    if (!Data.Tied) {
+      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
+      auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
+      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
+    }
     if (NumDependencies) {
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
-                          DepTaskArgs);
+      CGF.EmitRuntimeCall(
+          createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
     } else {
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
                           TaskArgs);
     }
+    // Check if parent region is untied and build return for untied task;
+    if (auto *Region =
+            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+      Region->emitUntiedSwitch(CGF);
   };
-  typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
-      IfCallEndCleanup;
 
   llvm::Value *DepWaitTaskArgs[6];
   if (NumDependencies) {
@@ -3090,40 +4013,111 @@ void CGOpenMPRuntime::emitTaskCall(
     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   }
-  auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
-                        NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
+  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
+                        NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
+                                                           PrePostActionTy &) {
+    auto &RT = CGF.CGM.getOpenMPRuntime();
     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
     // is specified.
     if (NumDependencies)
-      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
+      CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
                           DepWaitTaskArgs);
+    // Call proxy_task_entry(gtid, new_task);
+    auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
+        CodeGenFunction &CGF, PrePostActionTy &Action) {
+      Action.Enter(CGF);
+      llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
+      CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+    };
+
     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
     // kmp_task_t *new_task);
-    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
-                        TaskArgs);
     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
     // kmp_task_t *new_task);
-    CGF.EHStack.pushCleanup<IfCallEndCleanup>(
-        NormalAndEHCleanup,
-        createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
-        llvm::makeArrayRef(TaskArgs));
-
-    // Call proxy_task_entry(gtid, new_task);
-    llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
-    CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
+    RegionCodeGenTy RCG(CodeGen);
+    CommonActionTy Action(
+        RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
+        RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
+    RCG.setAction(Action);
+    RCG(CGF);
   };
 
-  if (IfCond) {
+  if (IfCond)
     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
-  } else {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    ThenCodeGen(CGF);
+  else {
+    RegionCodeGenTy ThenRCG(ThenCodeGen);
+    ThenRCG(CGF);
   }
 }
 
+void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                       const OMPLoopDirective &D,
+                                       llvm::Value *TaskFunction,
+                                       QualType SharedsTy, Address Shareds,
+                                       const Expr *IfCond,
+                                       const OMPTaskDataTy &Data) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  TaskResultTy Result =
+      emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
+  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
+  // libcall.
+  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
+  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
+  // sched, kmp_uint64 grainsize, void *task_dup);
+  llvm::Value *ThreadID = getThreadID(CGF, Loc);
+  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
+  llvm::Value *IfVal;
+  if (IfCond) {
+    IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
+                                      /*isSigned=*/true);
+  } else
+    IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
+
+  LValue LBLVal = CGF.EmitLValueForField(
+      Result.TDBase,
+      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
+  auto *LBVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
+  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  LValue UBLVal = CGF.EmitLValueForField(
+      Result.TDBase,
+      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
+  auto *UBVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
+  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  LValue StLVal = CGF.EmitLValueForField(
+      Result.TDBase,
+      *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
+  auto *StVar =
+      cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
+  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
+                       /*IsInitializer=*/true);
+  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+  llvm::Value *TaskArgs[] = {
+      UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
+      UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+      llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
+      llvm::ConstantInt::getSigned(
+          CGF.IntTy, Data.Schedule.getPointer()
+                         ? Data.Schedule.getInt() ? NumTasks : Grainsize
+                         : NoSchedule),
+      Data.Schedule.getPointer()
+          ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+                                      /*isSigned=*/false)
+          : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+      Result.TaskDupFn
+          ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
+                                                            CGF.VoidPtrTy)
+          : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
+}
+
 /// \brief Emit reduction operation for each element of array (required for
 /// array sections) LHS op = RHS.
 /// \param Type Type of array.
@@ -3204,6 +4198,26 @@ static void EmitOMPAggregateReduction(
   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
+/// Emit reduction combiner. If the combiner is a simple expression emit it as
+/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
+/// UDR combiner function.
+static void emitReductionCombiner(CodeGenFunction &CGF,
+                                  const Expr *ReductionOp) {
+  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
+    if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
+      if (auto *DRE =
+              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
+        if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
+          std::pair<llvm::Function *, llvm::Function *> Reduction =
+              CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
+          RValue Func = RValue::get(Reduction.first);
+          CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
+          CGF.EmitIgnoredExpr(ReductionOp);
+          return;
+        }
+  CGF.EmitIgnoredExpr(ReductionOp);
+}
+
 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
                                           llvm::Type *ArgsType,
                                           ArrayRef<const Expr *> Privates,
@@ -3220,9 +4234,7 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
                            C.VoidPtrTy);
   Args.push_back(&LHSArg);
   Args.push_back(&RHSArg);
-  FunctionType::ExtInfo EI;
-  auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
-      C.VoidTy, Args, EI, /*isVariadic=*/false);
+  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       ".omp.reduction.reduction_func", &CGM.getModule());
@@ -3255,17 +4267,16 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
     });
     QualType PrivTy = (*IPriv)->getType();
-    if (PrivTy->isArrayType()) {
+    if (PrivTy->isVariablyModifiedType()) {
       // Get array size and emit VLA type.
       ++Idx;
       Address Elem =
           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
+      auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
+      auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
       CodeGenFunction::OpaqueValueMapping OpaqueMap(
-          CGF,
-          cast<OpaqueValueExpr>(
-              CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
-          RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
+          CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
       CGF.EmitVariablyModifiedType(PrivTy);
     }
   }
@@ -3278,20 +4289,42 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
       // Emit reduction for array section.
       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-      EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
-                                [=](CodeGenFunction &CGF, const Expr *,
-                                    const Expr *,
-                                    const Expr *) { CGF.EmitIgnoredExpr(E); });
+      EmitOMPAggregateReduction(
+          CGF, (*IPriv)->getType(), LHSVar, RHSVar,
+          [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
+            emitReductionCombiner(CGF, E);
+          });
     } else
       // Emit reduction for array subscript or single variable.
-      CGF.EmitIgnoredExpr(E);
-    ++IPriv, ++ILHS, ++IRHS;
+      emitReductionCombiner(CGF, E);
+    ++IPriv;
+    ++ILHS;
+    ++IRHS;
   }
   Scope.ForceCleanup();
   CGF.FinishFunction();
   return Fn;
 }
 
+static void emitSingleReductionCombiner(CodeGenFunction &CGF,
+                                        const Expr *ReductionOp,
+                                        const Expr *PrivateRef,
+                                        const DeclRefExpr *LHS,
+                                        const DeclRefExpr *RHS) {
+  if (PrivateRef->getType()->isArrayType()) {
+    // Emit reduction for array section.
+    auto *LHSVar = cast<VarDecl>(LHS->getDecl());
+    auto *RHSVar = cast<VarDecl>(RHS->getDecl());
+    EmitOMPAggregateReduction(
+        CGF, PrivateRef->getType(), LHSVar, RHSVar,
+        [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
+          emitReductionCombiner(CGF, ReductionOp);
+        });
+  } else
+    // Emit reduction for array subscript or single variable.
+    emitReductionCombiner(CGF, ReductionOp);
+}
+
 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
                                     ArrayRef<const Expr *> Privates,
                                     ArrayRef<const Expr *> LHSExprs,
@@ -3343,16 +4376,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
     auto ILHS = LHSExprs.begin();
     auto IRHS = RHSExprs.begin();
     for (auto *E : ReductionOps) {
-      if ((*IPriv)->getType()->isArrayType()) {
-        auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-        auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-        EmitOMPAggregateReduction(
-            CGF, (*IPriv)->getType(), LHSVar, RHSVar,
-            [=](CodeGenFunction &CGF, const Expr *, const Expr *,
-                const Expr *) { CGF.EmitIgnoredExpr(E); });
-      } else
-        CGF.EmitIgnoredExpr(E);
-      ++IPriv, ++ILHS, ++IRHS;
+      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+                                  cast<DeclRefExpr>(*IRHS));
+      ++IPriv;
+      ++ILHS;
+      ++IRHS;
     }
     return;
   }
@@ -3361,7 +4389,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
   auto Size = RHSExprs.size();
   for (auto *E : Privates) {
-    if (E->getType()->isArrayType())
+    if (E->getType()->isVariablyModifiedType())
       // Reserve place for array size.
       ++Size;
   }
@@ -3380,20 +4408,18 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
         Elem);
-    if ((*IPriv)->getType()->isArrayType()) {
+    if ((*IPriv)->getType()->isVariablyModifiedType()) {
       // Store array size.
       ++Idx;
       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
                                              CGF.getPointerSize());
-      CGF.Builder.CreateStore(
-          CGF.Builder.CreateIntToPtr(
-              CGF.Builder.CreateIntCast(
-                  CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
-                                     (*IPriv)->getType()))
-                      .first,
-                  CGF.SizeTy, /*isSigned=*/false),
-              CGF.VoidPtrTy),
-          Elem);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(
+          CGF.getVLASize(
+                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+              .first,
+          CGF.SizeTy, /*isSigned=*/false);
+      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+                              Elem);
     }
   }
 
@@ -3407,11 +4433,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
 
   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
   // RedList, reduce_func, &<lock>);
-  auto *IdentTLoc = emitUpdateLocation(
-      CGF, Loc,
-      static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
+  auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
   auto *ThreadId = getThreadID(CGF, Loc);
-  auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
+  auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
   auto *RL =
     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
                                                     CGF.VoidPtrTy);
@@ -3443,38 +4467,33 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
   CGF.EmitBlock(Case1BB);
 
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
-    llvm::Value *EndArgs[] = {
-        IdentTLoc, // ident_t *<loc>
-        ThreadId,  // i32 <gtid>
-        Lock       // kmp_critical_name *&<lock>
-    };
-    CGF.EHStack
-        .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
-            NormalAndEHCleanup,
-            createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
-                                             : OMPRTL__kmpc_end_reduce),
-            llvm::makeArrayRef(EndArgs));
+  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+  llvm::Value *EndArgs[] = {
+      IdentTLoc, // ident_t *<loc>
+      ThreadId,  // i32 <gtid>
+      Lock       // kmp_critical_name *&<lock>
+  };
+  auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
+      CodeGenFunction &CGF, PrePostActionTy &Action) {
     auto IPriv = Privates.begin();
     auto ILHS = LHSExprs.begin();
     auto IRHS = RHSExprs.begin();
     for (auto *E : ReductionOps) {
-      if ((*IPriv)->getType()->isArrayType()) {
-        // Emit reduction for array section.
-        auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-        auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-        EmitOMPAggregateReduction(
-            CGF, (*IPriv)->getType(), LHSVar, RHSVar,
-            [=](CodeGenFunction &CGF, const Expr *, const Expr *,
-                const Expr *) { CGF.EmitIgnoredExpr(E); });
-      } else
-        // Emit reduction for array subscript or single variable.
-        CGF.EmitIgnoredExpr(E);
-      ++IPriv, ++ILHS, ++IRHS;
+      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+                                  cast<DeclRefExpr>(*IRHS));
+      ++IPriv;
+      ++ILHS;
+      ++IRHS;
     }
-  }
+  };
+  RegionCodeGenTy RCG(CodeGen);
+  CommonActionTy Action(
+      nullptr, llvm::None,
+      createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
+                                       : OMPRTL__kmpc_end_reduce),
+      EndArgs);
+  RCG.setAction(Action);
+  RCG(CGF);
 
   CGF.EmitBranch(DefaultBB);
 
@@ -3487,103 +4506,113 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
   CGF.EmitBlock(Case2BB);
 
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    if (!WithNowait) {
-      // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
-      llvm::Value *EndArgs[] = {
-          IdentTLoc, // ident_t *<loc>
-          ThreadId,  // i32 <gtid>
-          Lock       // kmp_critical_name *&<lock>
-      };
-      CGF.EHStack
-          .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
-              NormalAndEHCleanup,
-              createRuntimeFunction(OMPRTL__kmpc_end_reduce),
-              llvm::makeArrayRef(EndArgs));
-    }
+  auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
+      CodeGenFunction &CGF, PrePostActionTy &Action) {
     auto ILHS = LHSExprs.begin();
     auto IRHS = RHSExprs.begin();
     auto IPriv = Privates.begin();
     for (auto *E : ReductionOps) {
-        const Expr *XExpr = nullptr;
-        const Expr *EExpr = nullptr;
-        const Expr *UpExpr = nullptr;
-        BinaryOperatorKind BO = BO_Comma;
-        if (auto *BO = dyn_cast<BinaryOperator>(E)) {
-          if (BO->getOpcode() == BO_Assign) {
-            XExpr = BO->getLHS();
-            UpExpr = BO->getRHS();
-          }
+      const Expr *XExpr = nullptr;
+      const Expr *EExpr = nullptr;
+      const Expr *UpExpr = nullptr;
+      BinaryOperatorKind BO = BO_Comma;
+      if (auto *BO = dyn_cast<BinaryOperator>(E)) {
+        if (BO->getOpcode() == BO_Assign) {
+          XExpr = BO->getLHS();
+          UpExpr = BO->getRHS();
         }
-        // Try to emit update expression as a simple atomic.
-        auto *RHSExpr = UpExpr;
-        if (RHSExpr) {
-          // Analyze RHS part of the whole expression.
-          if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
-                  RHSExpr->IgnoreParenImpCasts())) {
-            // If this is a conditional operator, analyze its condition for
-            // min/max reduction operator.
-            RHSExpr = ACO->getCond();
-          }
-          if (auto *BORHS =
-                  dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
-            EExpr = BORHS->getRHS();
-            BO = BORHS->getOpcode();
-          }
+      }
+      // Try to emit update expression as a simple atomic.
+      auto *RHSExpr = UpExpr;
+      if (RHSExpr) {
+        // Analyze RHS part of the whole expression.
+        if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
+                RHSExpr->IgnoreParenImpCasts())) {
+          // If this is a conditional operator, analyze its condition for
+          // min/max reduction operator.
+          RHSExpr = ACO->getCond();
         }
-        if (XExpr) {
-          auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-          auto &&AtomicRedGen = [this, BO, VD, IPriv,
-                                 Loc](CodeGenFunction &CGF, const Expr *XExpr,
-                                      const Expr *EExpr, const Expr *UpExpr) {
-            LValue X = CGF.EmitLValue(XExpr);
-            RValue E;
-            if (EExpr)
-              E = CGF.EmitAnyExpr(EExpr);
-            CGF.EmitOMPAtomicSimpleUpdateExpr(
-                X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
-                [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
-                  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
-                  PrivateScope.addPrivate(
-                      VD, [&CGF, VD, XRValue, Loc]() -> Address {
-                        Address LHSTemp = CGF.CreateMemTemp(VD->getType());
-                        CGF.emitOMPSimpleStore(
-                            CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
-                            VD->getType().getNonReferenceType(), Loc);
-                        return LHSTemp;
-                      });
-                  (void)PrivateScope.Privatize();
-                  return CGF.EmitAnyExpr(UpExpr);
-                });
-          };
-          if ((*IPriv)->getType()->isArrayType()) {
-            // Emit atomic reduction for array section.
-            auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-            EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
-                                      AtomicRedGen, XExpr, EExpr, UpExpr);
-          } else
-            // Emit atomic reduction for array subscript or single variable.
-            AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
-        } else {
-          // Emit as a critical region.
-          auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
-                                             const Expr *, const Expr *) {
-            emitCriticalRegion(
-                CGF, ".atomic_reduction",
-                [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
-          };
-          if ((*IPriv)->getType()->isArrayType()) {
-            auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-            auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-            EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
-                                      CritRedGen);
-          } else
-            CritRedGen(CGF, nullptr, nullptr, nullptr);
+        if (auto *BORHS =
+                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
+          EExpr = BORHS->getRHS();
+          BO = BORHS->getOpcode();
         }
-      ++ILHS, ++IRHS, ++IPriv;
+      }
+      if (XExpr) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+        auto &&AtomicRedGen = [BO, VD, IPriv,
+                               Loc](CodeGenFunction &CGF, const Expr *XExpr,
+                                    const Expr *EExpr, const Expr *UpExpr) {
+          LValue X = CGF.EmitLValue(XExpr);
+          RValue E;
+          if (EExpr)
+            E = CGF.EmitAnyExpr(EExpr);
+          CGF.EmitOMPAtomicSimpleUpdateExpr(
+              X, E, BO, /*IsXLHSInRHSPart=*/true,
+              llvm::AtomicOrdering::Monotonic, Loc,
+              [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
+                CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+                PrivateScope.addPrivate(
+                    VD, [&CGF, VD, XRValue, Loc]() -> Address {
+                      Address LHSTemp = CGF.CreateMemTemp(VD->getType());
+                      CGF.emitOMPSimpleStore(
+                          CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
+                          VD->getType().getNonReferenceType(), Loc);
+                      return LHSTemp;
+                    });
+                (void)PrivateScope.Privatize();
+                return CGF.EmitAnyExpr(UpExpr);
+              });
+        };
+        if ((*IPriv)->getType()->isArrayType()) {
+          // Emit atomic reduction for array section.
+          auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
+                                    AtomicRedGen, XExpr, EExpr, UpExpr);
+        } else
+          // Emit atomic reduction for array subscript or single variable.
+          AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
+      } else {
+        // Emit as a critical region.
+        auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
+                                     const Expr *, const Expr *) {
+          auto &RT = CGF.CGM.getOpenMPRuntime();
+          RT.emitCriticalRegion(
+              CGF, ".atomic_reduction",
+              [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
+                Action.Enter(CGF);
+                emitReductionCombiner(CGF, E);
+              },
+              Loc);
+        };
+        if ((*IPriv)->getType()->isArrayType()) {
+          auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+          auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+          EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
+                                    CritRedGen);
+        } else
+          CritRedGen(CGF, nullptr, nullptr, nullptr);
+      }
+      ++ILHS;
+      ++IRHS;
+      ++IPriv;
     }
-  }
+  };
+  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
+  if (!WithNowait) {
+    // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
+    llvm::Value *EndArgs[] = {
+        IdentTLoc, // ident_t *<loc>
+        ThreadId,  // i32 <gtid>
+        Lock       // kmp_critical_name *&<lock>
+    };
+    CommonActionTy Action(nullptr, llvm::None,
+                          createRuntimeFunction(OMPRTL__kmpc_end_reduce),
+                          EndArgs);
+    AtomicRCG.setAction(Action);
+    AtomicRCG(CGF);
+  } else
+    AtomicRCG(CGF);
 
   CGF.EmitBranch(DefaultBB);
   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
@@ -3598,6 +4627,8 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   // Ignore return result until untied tasks are supported.
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
+  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+    Region->emitUntiedSwitch(CGF);
 }
 
 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
@@ -3618,7 +4649,7 @@ enum RTCancelKind {
   CancelSections = 3,
   CancelTaskgroup = 4
 };
-}
+} // anonymous namespace
 
 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
   RTCancelKind CancelKind = CancelNoreq;
@@ -3680,14 +4711,15 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
   // kmp_int32 cncl_kind);
   if (auto *OMPRegionInfo =
           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
-    auto &&ThenGen = [this, Loc, CancelRegion,
-                      OMPRegionInfo](CodeGenFunction &CGF) {
+    auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
+                                                        PrePostActionTy &) {
+      auto &RT = CGF.CGM.getOpenMPRuntime();
       llvm::Value *Args[] = {
-          emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+          RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
       // Ignore return result until untied tasks are supported.
-      auto *Result =
-          CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
+      auto *Result = CGF.EmitRuntimeCall(
+          RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
       // if (__kmpc_cancel()) {
       //  __kmpc_cancel_barrier();
       //   exit from construct;
@@ -3698,7 +4730,7 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
       CGF.EmitBlock(ExitBB);
       // __kmpc_cancel_barrier();
-      emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
+      RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
       // exit from construct;
       auto CancelDest =
           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
@@ -3706,18 +4738,21 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
     };
     if (IfCond)
-      emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
-    else
-      ThenGen(CGF);
+      emitOMPIfClause(CGF, IfCond, ThenGen,
+                      [](CodeGenFunction &, PrePostActionTy &) {});
+    else {
+      RegionCodeGenTy ThenRCG(ThenGen);
+      ThenRCG(CGF);
+    }
   }
 }
 
 /// \brief Obtain information that uniquely identifies a target entry. This
-/// consists of the file and device IDs as well as line and column numbers
-/// associated with the relevant entry source location.
+/// consists of the file and device IDs as well as line number associated with
+/// the relevant entry source location.
 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
                                      unsigned &DeviceID, unsigned &FileID,
-                                     unsigned &LineNum, unsigned &ColumnNum) {
+                                     unsigned &LineNum) {
 
   auto &SM = C.getSourceManager();
 
@@ -3737,49 +4772,45 @@ static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
   DeviceID = ID.getDevice();
   FileID = ID.getFile();
   LineNum = PLoc.getLine();
-  ColumnNum = PLoc.getColumn();
-  return;
 }
 
 void CGOpenMPRuntime::emitTargetOutlinedFunction(
     const OMPExecutableDirective &D, StringRef ParentName,
     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
-    bool IsOffloadEntry) {
-
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
   assert(!ParentName.empty() && "Invalid target region parent name!");
 
-  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
-
-  // Emit target region as a standalone region.
-  auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
-    CGF.EmitStmt(CS.getCapturedStmt());
-  };
+  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+                                   IsOffloadEntry, CodeGen);
+}
 
-  // Create a unique name for the proxy/entry function that using the source
-  // location information of the current target region. The name will be
-  // something like:
+void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
+    const OMPExecutableDirective &D, StringRef ParentName,
+    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+  // Create a unique name for the entry function using the source location
+  // information of the current target region. The name will be something like:
   //
-  // .omp_offloading.DD_FFFF.PP.lBB.cCC
+  // __omp_offloading_DD_FFFF_PP_lBB
   //
   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
-  // mangled name of the function that encloses the target region, BB is the
-  // line number of the target region, and CC is the column number of the target
-  // region.
+  // mangled name of the function that encloses the target region and BB is the
+  // line number of the target region.
 
   unsigned DeviceID;
   unsigned FileID;
   unsigned Line;
-  unsigned Column;
   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
-                           Line, Column);
+                           Line);
   SmallString<64> EntryFnName;
   {
     llvm::raw_svector_ostream OS(EntryFnName);
-    OS << ".omp_offloading" << llvm::format(".%x", DeviceID)
-       << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c"
-       << Column;
+    OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
+       << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
   }
 
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
   CodeGenFunction CGF(CGM, true);
   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
@@ -3813,18 +4844,122 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction(
 
   // Register the information for the entry associated with this target region.
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
-      DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
-  return;
+      DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
-void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
-                                     const OMPExecutableDirective &D,
-                                     llvm::Value *OutlinedFn,
-                                     llvm::Value *OutlinedFnID,
-                                     const Expr *IfCond, const Expr *Device,
-                                     ArrayRef<llvm::Value *> CapturedVars) {
-  if (!CGF.HaveInsertPoint())
-    return;
+/// discard all CompoundStmts intervening between two constructs
+static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
+  while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
+    Body = CS->body_front();
+
+  return Body;
+}
+
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+                                     CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D) {
+
+  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+                                              "teams directive expected to be "
+                                              "emitted only for the host!");
+
+  // FIXME: For the moment we do not support combined directives with target and
+  // teams, so we do not expect to get any num_teams clause in the provided
+  // directive. Once we support that, this assertion can be replaced by the
+  // actual emission of the clause expression.
+  assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
+         "Not expecting clause in directive.");
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams to pass to the runtime function call. This is done
+  // by generating the expression in a inlined region. This is required because
+  // the expression is captured in the enclosing target environment when the
+  // teams directive is not combined with target.
+
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
+          ignoreCompoundStmts(CS.getCapturedStmt()))) {
+    if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+      CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+      llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
+      return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
+                                       /*IsSigned=*/true);
+    }
+
+    // If we have an enclosed teams directive but no num_teams clause we use
+    // the default value 0.
+    return CGF.Builder.getInt32(0);
+  }
+
+  // No teams associated with the directive.
+  return nullptr;
+}
+
+/// \brief Emit the thread_limit clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no thread_limit clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+                                        CodeGenFunction &CGF,
+                                        const OMPExecutableDirective &D) {
+
+  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+                                              "teams directive expected to be "
+                                              "emitted only for the host!");
+
+  // FIXME: For the moment we do not support combined directives with target and
+  // teams, so we do not expect to get any thread_limit clause in the provided
+  // directive. Once we support that, this assertion can be replaced by the
+  // actual emission of the clause expression.
+  assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
+         "Not expecting clause in directive.");
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the thread limit to pass to the runtime function call. This is done
+  // by generating the expression in a inlined region. This is required because
+  // the expression is captured in the enclosing target environment when the
+  // teams directive is not combined with target.
+
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
+          ignoreCompoundStmts(CS.getCapturedStmt()))) {
+    if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+      CGOpenMPInnerExprInfo CGInfo(CGF, CS);
+      CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+      llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
+      return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+                                       /*IsSigned=*/true);
+    }
+
+    // If we have an enclosed teams directive but no thread_limit clause we use
+    // the default value 0.
+    return CGF.Builder.getInt32(0);
+  }
+
+  // No teams associated with the directive.
+  return nullptr;
+}
+
+namespace {
+// \brief Utility to handle information from clauses associated with a given
+// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
+// It provides a convenient interface to obtain the information and generate
+// code for that information.
+class MappableExprsHandler {
+public:
   /// \brief Values for bit flags used to specify the mapping type for
   /// offloading.
   enum OpenMPOffloadMappingFlags {
@@ -3832,105 +4967,806 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
     OMP_MAP_TO = 0x01,
     /// \brief Allocate memory on the device and move data from device to host.
     OMP_MAP_FROM = 0x02,
-    /// \brief The element passed to the device is a pointer.
-    OMP_MAP_PTR = 0x20,
+    /// \brief Always perform the requested mapping action on the element, even
+    /// if it was already mapped before.
+    OMP_MAP_ALWAYS = 0x04,
+    /// \brief Delete the element from the device environment, ignoring the
+    /// current reference count associated with the element.
+    OMP_MAP_DELETE = 0x08,
+    /// \brief The element being mapped is a pointer, therefore the pointee
+    /// should be mapped as well.
+    OMP_MAP_IS_PTR = 0x10,
+    /// \brief This flags signals that an argument is the first one relating to
+    /// a map/private clause expression. For some cases a single
+    /// map/privatization results in multiple arguments passed to the runtime
+    /// library.
+    OMP_MAP_FIRST_REF = 0x20,
+    /// \brief This flag signals that the reference being passed is a pointer to
+    /// private data.
+    OMP_MAP_PRIVATE_PTR = 0x80,
     /// \brief Pass the element to the device by value.
-    OMP_MAP_BYCOPY = 0x80,
+    OMP_MAP_PRIVATE_VAL = 0x100,
   };
 
-  enum OpenMPOffloadingReservedDeviceIDs {
-    /// \brief Device ID if the device was not defined, runtime should get it
-    /// from environment variables in the spec.
-    OMP_DEVICEID_UNDEF = -1,
-  };
+  typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
+  typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
+
+private:
+  /// \brief Directive from where the map clauses were extracted.
+  const OMPExecutableDirective &Directive;
+
+  /// \brief Function the directive is being generated for.
+  CodeGenFunction &CGF;
+
+  /// \brief Set of all first private variables in the current directive.
+  llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
+
+  llvm::Value *getExprTypeSize(const Expr *E) const {
+    auto ExprTy = E->getType().getCanonicalType();
+
+    // Reference types are ignored for mapping purposes.
+    if (auto *RefTy = ExprTy->getAs<ReferenceType>())
+      ExprTy = RefTy->getPointeeType().getCanonicalType();
+
+    // Given that an array section is considered a built-in type, we need to
+    // do the calculation based on the length of the section instead of relying
+    // on CGF.getTypeSize(E->getType()).
+    if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
+      QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
+                            OAE->getBase()->IgnoreParenImpCasts())
+                            .getCanonicalType();
+
+      // If there is no length associated with the expression, that means we
+      // are using the whole length of the base.
+      if (!OAE->getLength() && OAE->getColonLoc().isValid())
+        return CGF.getTypeSize(BaseTy);
+
+      llvm::Value *ElemSize;
+      if (auto *PTy = BaseTy->getAs<PointerType>())
+        ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
+      else {
+        auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
+        assert(ATy && "Expecting array type if not a pointer type.");
+        ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
+      }
+
+      // If we don't have a length at this point, that is because we have an
+      // array section with a single element.
+      if (!OAE->getLength())
+        return ElemSize;
+
+      auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
+      LengthVal =
+          CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
+      return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
+    }
+    return CGF.getTypeSize(ExprTy);
+  }
+
+  /// \brief Return the corresponding bits for a given map clause modifier. Add
+  /// a flag marking the map as a pointer if requested. Add a flag marking the
+  /// map as the first one of a series of maps that relate to the same map
+  /// expression.
+  unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
+                          OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
+                          bool AddIsFirstFlag) const {
+    unsigned Bits = 0u;
+    switch (MapType) {
+    case OMPC_MAP_alloc:
+    case OMPC_MAP_release:
+      // alloc and release is the default behavior in the runtime library,  i.e.
+      // if we don't pass any bits alloc/release that is what the runtime is
+      // going to do. Therefore, we don't need to signal anything for these two
+      // type modifiers.
+      break;
+    case OMPC_MAP_to:
+      Bits = OMP_MAP_TO;
+      break;
+    case OMPC_MAP_from:
+      Bits = OMP_MAP_FROM;
+      break;
+    case OMPC_MAP_tofrom:
+      Bits = OMP_MAP_TO | OMP_MAP_FROM;
+      break;
+    case OMPC_MAP_delete:
+      Bits = OMP_MAP_DELETE;
+      break;
+    default:
+      llvm_unreachable("Unexpected map type!");
+      break;
+    }
+    if (AddPtrFlag)
+      Bits |= OMP_MAP_IS_PTR;
+    if (AddIsFirstFlag)
+      Bits |= OMP_MAP_FIRST_REF;
+    if (MapTypeModifier == OMPC_MAP_always)
+      Bits |= OMP_MAP_ALWAYS;
+    return Bits;
+  }
+
+  /// \brief Return true if the provided expression is a final array section. A
+  /// final array section, is one whose length can't be proved to be one.
+  bool isFinalArraySectionExpression(const Expr *E) const {
+    auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
+
+    // It is not an array section and therefore not a unity-size one.
+    if (!OASE)
+      return false;
+
+    // An array section with no colon always refer to a single element.
+    if (OASE->getColonLoc().isInvalid())
+      return false;
+
+    auto *Length = OASE->getLength();
+
+    // If we don't have a length we have to check if the array has size 1
+    // for this dimension. Also, we should always expect a length if the
+    // base type is pointer.
+    if (!Length) {
+      auto BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
+                         OASE->getBase()->IgnoreParenImpCasts())
+                         .getCanonicalType();
+      if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
+        return ATy->getSize().getSExtValue() != 1;
+      // If we don't have a constant dimension length, we have to consider
+      // the current section as having any size, so it is not necessarily
+      // unitary. If it happen to be unity size, that's user fault.
+      return true;
+    }
+
+    // Check if the length evaluates to 1.
+    llvm::APSInt ConstLength;
+    if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
+      return true; // Can have more that size 1.
+
+    return ConstLength.getSExtValue() != 1;
+  }
+
+  /// \brief Generate the base pointers, section pointers, sizes and map type
+  /// bits for the provided map type, map modifier, and expression components.
+  /// \a IsFirstComponent should be set to true if the provided set of
+  /// components is the first associated with a capture.
+  void generateInfoForComponentList(
+      OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
+      OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
+      MapValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
+      MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
+      bool IsFirstComponentList) const {
+
+    // The following summarizes what has to be generated for each map and the
+    // types bellow. The generated information is expressed in this order:
+    // base pointer, section pointer, size, flags
+    // (to add to the ones that come from the map type and modifier).
+    //
+    // double d;
+    // int i[100];
+    // float *p;
+    //
+    // struct S1 {
+    //   int i;
+    //   float f[50];
+    // }
+    // struct S2 {
+    //   int i;
+    //   float f[50];
+    //   S1 s;
+    //   double *p;
+    //   struct S2 *ps;
+    // }
+    // S2 s;
+    // S2 *ps;
+    //
+    // map(d)
+    // &d, &d, sizeof(double), noflags
+    //
+    // map(i)
+    // &i, &i, 100*sizeof(int), noflags
+    //
+    // map(i[1:23])
+    // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
+    //
+    // map(p)
+    // &p, &p, sizeof(float*), noflags
+    //
+    // map(p[1:24])
+    // p, &p[1], 24*sizeof(float), noflags
+    //
+    // map(s)
+    // &s, &s, sizeof(S2), noflags
+    //
+    // map(s.i)
+    // &s, &(s.i), sizeof(int), noflags
+    //
+    // map(s.s.f)
+    // &s, &(s.i.f), 50*sizeof(int), noflags
+    //
+    // map(s.p)
+    // &s, &(s.p), sizeof(double*), noflags
+    //
+    // map(s.p[:22], s.a s.b)
+    // &s, &(s.p), sizeof(double*), noflags
+    // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
+    //
+    // map(s.ps)
+    // &s, &(s.ps), sizeof(S2*), noflags
+    //
+    // map(s.ps->s.i)
+    // &s, &(s.ps), sizeof(S2*), noflags
+    // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
+    //
+    // map(s.ps->ps)
+    // &s, &(s.ps), sizeof(S2*), noflags
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    //
+    // map(s.ps->ps->ps)
+    // &s, &(s.ps), sizeof(S2*), noflags
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    //
+    // map(s.ps->ps->s.f[:22])
+    // &s, &(s.ps), sizeof(S2*), noflags
+    // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
+    //
+    // map(ps)
+    // &ps, &ps, sizeof(S2*), noflags
+    //
+    // map(ps->i)
+    // ps, &(ps->i), sizeof(int), noflags
+    //
+    // map(ps->s.f)
+    // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
+    //
+    // map(ps->p)
+    // ps, &(ps->p), sizeof(double*), noflags
+    //
+    // map(ps->p[:22])
+    // ps, &(ps->p), sizeof(double*), noflags
+    // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
+    //
+    // map(ps->ps)
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    //
+    // map(ps->ps->s.i)
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
+    //
+    // map(ps->ps->ps)
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    //
+    // map(ps->ps->ps->ps)
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    //
+    // map(ps->ps->ps->s.f[:22])
+    // ps, &(ps->ps), sizeof(S2*), noflags
+    // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
+    // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
+    // extra_flag
+
+    // Track if the map information being generated is the first for a capture.
+    bool IsCaptureFirstInfo = IsFirstComponentList;
+
+    // Scan the components from the base to the complete expression.
+    auto CI = Components.rbegin();
+    auto CE = Components.rend();
+    auto I = CI;
+
+    // Track if the map information being generated is the first for a list of
+    // components.
+    bool IsExpressionFirstInfo = true;
+    llvm::Value *BP = nullptr;
+
+    if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
+      // The base is the 'this' pointer. The content of the pointer is going
+      // to be the base of the field being mapped.
+      BP = CGF.EmitScalarExpr(ME->getBase());
+    } else {
+      // The base is the reference to the variable.
+      // BP = &Var.
+      BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
+               .getPointer();
+
+      // If the variable is a pointer and is being dereferenced (i.e. is not
+      // the last component), the base has to be the pointer itself, not its
+      // reference.
+      if (I->getAssociatedDeclaration()->getType()->isAnyPointerType() &&
+          std::next(I) != CE) {
+        auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(
+            BP, I->getAssociatedDeclaration()->getType());
+        BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
+                                         I->getAssociatedDeclaration()
+                                             ->getType()
+                                             ->getAs<PointerType>())
+                 .getPointer();
+
+        // We do not need to generate individual map information for the
+        // pointer, it can be associated with the combined storage.
+        ++I;
+      }
+    }
+
+    for (; I != CE; ++I) {
+      auto Next = std::next(I);
+
+      // We need to generate the addresses and sizes if this is the last
+      // component, if the component is a pointer or if it is an array section
+      // whose length can't be proved to be one. If this is a pointer, it
+      // becomes the base address for the following components.
+
+      // A final array section, is one whose length can't be proved to be one.
+      bool IsFinalArraySection =
+          isFinalArraySectionExpression(I->getAssociatedExpression());
+
+      // Get information on whether the element is a pointer. Have to do a
+      // special treatment for array sections given that they are built-in
+      // types.
+      const auto *OASE =
+          dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
+      bool IsPointer =
+          (OASE &&
+           OMPArraySectionExpr::getBaseOriginalType(OASE)
+               .getCanonicalType()
+               ->isAnyPointerType()) ||
+          I->getAssociatedExpression()->getType()->isAnyPointerType();
+
+      if (Next == CE || IsPointer || IsFinalArraySection) {
+
+        // If this is not the last component, we expect the pointer to be
+        // associated with an array expression or member expression.
+        assert((Next == CE ||
+                isa<MemberExpr>(Next->getAssociatedExpression()) ||
+                isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
+                isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
+               "Unexpected expression");
+
+        // Save the base we are currently using.
+        BasePointers.push_back(BP);
+
+        auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
+        auto *Size = getExprTypeSize(I->getAssociatedExpression());
+
+        Pointers.push_back(LB);
+        Sizes.push_back(Size);
+        // We need to add a pointer flag for each map that comes from the
+        // same expression except for the first one. We also need to signal
+        // this map is the first one that relates with the current capture
+        // (there is a set of entries for each capture).
+        Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
+                                       !IsExpressionFirstInfo,
+                                       IsCaptureFirstInfo));
+
+        // If we have a final array section, we are done with this expression.
+        if (IsFinalArraySection)
+          break;
+
+        // The pointer becomes the base for the next element.
+        if (Next != CE)
+          BP = LB;
+
+        IsExpressionFirstInfo = false;
+        IsCaptureFirstInfo = false;
+        continue;
+      }
+    }
+  }
+
+  /// \brief Return the adjusted map modifiers if the declaration a capture
+  /// refers to appears in a first-private clause. This is expected to be used
+  /// only with directives that start with 'target'.
+  unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
+                                               unsigned CurrentModifiers) {
+    assert(Cap.capturesVariable() && "Expected capture by reference only!");
+
+    // A first private variable captured by reference will use only the
+    // 'private ptr' and 'map to' flag. Return the right flags if the captured
+    // declaration is known as first-private in this handler.
+    if (FirstPrivateDecls.count(Cap.getCapturedVar()))
+      return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
+             MappableExprsHandler::OMP_MAP_TO;
+
+    // We didn't modify anything.
+    return CurrentModifiers;
+  }
+
+public:
+  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
+      : Directive(Dir), CGF(CGF) {
+    // Extract firstprivate clause information.
+    for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
+      for (const auto *D : C->varlists())
+        FirstPrivateDecls.insert(
+            cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+  }
+
+  /// \brief Generate all the base pointers, section pointers, sizes and map
+  /// types for the extracted mappable expressions.
+  void generateAllInfo(MapValuesArrayTy &BasePointers,
+                       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
+                       MapFlagsArrayTy &Types) const {
+    BasePointers.clear();
+    Pointers.clear();
+    Sizes.clear();
+    Types.clear();
+
+    struct MapInfo {
+      OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+      OpenMPMapClauseKind MapType;
+      OpenMPMapClauseKind MapTypeModifier;
+    };
+
+    // We have to process the component lists that relate with the same
+    // declaration in a single chunk so that we can generate the map flags
+    // correctly. Therefore, we organize all lists in a map.
+    llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
+
+    // Helper function to fill the information map for the different supported
+    // clauses.
+    auto &&InfoGen =
+        [&Info](const ValueDecl *D,
+                OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+                OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier) {
+          const ValueDecl *VD =
+              D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+          Info[VD].push_back({L, MapType, MapModifier});
+        };
+
+    for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
+      for (auto L : C->component_lists())
+        InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier());
+    for (auto *C : Directive.getClausesOfKind<OMPToClause>())
+      for (auto L : C->component_lists())
+        InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown);
+    for (auto *C : Directive.getClausesOfKind<OMPFromClause>())
+      for (auto L : C->component_lists())
+        InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown);
+
+    for (auto &M : Info) {
+      // We need to know when we generate information for the first component
+      // associated with a capture, because the mapping flags depend on it.
+      bool IsFirstComponentList = true;
+      for (MapInfo &L : M.second) {
+        assert(!L.Components.empty() &&
+               "Not expecting declaration with no component lists.");
+        generateInfoForComponentList(L.MapType, L.MapTypeModifier, L.Components,
+                                     BasePointers, Pointers, Sizes, Types,
+                                     IsFirstComponentList);
+        IsFirstComponentList = false;
+      }
+    }
+  }
+
+  /// \brief Generate the base pointers, section pointers, sizes and map types
+  /// associated to a given capture.
+  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
+                              MapValuesArrayTy &BasePointers,
+                              MapValuesArrayTy &Pointers,
+                              MapValuesArrayTy &Sizes,
+                              MapFlagsArrayTy &Types) const {
+    assert(!Cap->capturesVariableArrayType() &&
+           "Not expecting to generate map info for a variable array type!");
+
+    BasePointers.clear();
+    Pointers.clear();
+    Sizes.clear();
+    Types.clear();
+
+    const ValueDecl *VD =
+        Cap->capturesThis()
+            ? nullptr
+            : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
+
+    // We need to know when we generating information for the first component
+    // associated with a capture, because the mapping flags depend on it.
+    bool IsFirstComponentList = true;
+    for (auto *C : Directive.getClausesOfKind<OMPMapClause>())
+      for (auto L : C->decl_component_lists(VD)) {
+        assert(L.first == VD &&
+               "We got information for the wrong declaration??");
+        assert(!L.second.empty() &&
+               "Not expecting declaration with no component lists.");
+        generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
+                                     L.second, BasePointers, Pointers, Sizes,
+                                     Types, IsFirstComponentList);
+        IsFirstComponentList = false;
+      }
+
+    return;
+  }
+
+  /// \brief Generate the default map information for a given capture \a CI,
+  /// record field declaration \a RI and captured value \a CV.
+  void generateDefaultMapInfo(
+      const CapturedStmt::Capture &CI, const FieldDecl &RI, llvm::Value *CV,
+      MappableExprsHandler::MapValuesArrayTy &CurBasePointers,
+      MappableExprsHandler::MapValuesArrayTy &CurPointers,
+      MappableExprsHandler::MapValuesArrayTy &CurSizes,
+      MappableExprsHandler::MapFlagsArrayTy &CurMapTypes) {
+
+    // Do the default mapping.
+    if (CI.capturesThis()) {
+      CurBasePointers.push_back(CV);
+      CurPointers.push_back(CV);
+      const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
+      CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
+      // Default map type.
+      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_TO |
+                            MappableExprsHandler::OMP_MAP_FROM);
+    } else if (CI.capturesVariableByCopy()) {
+      CurBasePointers.push_back(CV);
+      CurPointers.push_back(CV);
+      if (!RI.getType()->isAnyPointerType()) {
+        // We have to signal to the runtime captures passed by value that are
+        // not pointers.
+        CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL);
+        CurSizes.push_back(CGF.getTypeSize(RI.getType()));
+      } else {
+        // Pointers are implicitly mapped with a zero size and no flags
+        // (other than first map that is added for all implicit maps).
+        CurMapTypes.push_back(0u);
+        CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
+      }
+    } else {
+      assert(CI.capturesVariable() && "Expected captured reference.");
+      CurBasePointers.push_back(CV);
+      CurPointers.push_back(CV);
+
+      const ReferenceType *PtrTy =
+          cast<ReferenceType>(RI.getType().getTypePtr());
+      QualType ElementType = PtrTy->getPointeeType();
+      CurSizes.push_back(CGF.getTypeSize(ElementType));
+      // The default map type for a scalar/complex type is 'to' because by
+      // default the value doesn't have to be retrieved. For an aggregate
+      // type, the default is 'tofrom'.
+      CurMapTypes.push_back(ElementType->isAggregateType()
+                                ? (MappableExprsHandler::OMP_MAP_TO |
+                                   MappableExprsHandler::OMP_MAP_FROM)
+                                : MappableExprsHandler::OMP_MAP_TO);
+
+      // If we have a capture by reference we may need to add the private
+      // pointer flag if the base declaration shows in some first-private
+      // clause.
+      CurMapTypes.back() =
+          adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
+    }
+    // Every default map produces a single argument, so, it is always the
+    // first one.
+    CurMapTypes.back() |= MappableExprsHandler::OMP_MAP_FIRST_REF;
+  }
+};
+
+enum OpenMPOffloadingReservedDeviceIDs {
+  /// \brief Device ID if the device was not defined, runtime should get it
+  /// from environment variables in the spec.
+  OMP_DEVICEID_UNDEF = -1,
+};
+} // anonymous namespace
+
+/// \brief Emit the arrays used to pass the captures and map information to the
+/// offloading runtime library. If there is no map or capture information,
+/// return nullptr by reference.
+static void
+emitOffloadingArrays(CodeGenFunction &CGF, llvm::Value *&BasePointersArray,
+                     llvm::Value *&PointersArray, llvm::Value *&SizesArray,
+                     llvm::Value *&MapTypesArray,
+                     MappableExprsHandler::MapValuesArrayTy &BasePointers,
+                     MappableExprsHandler::MapValuesArrayTy &Pointers,
+                     MappableExprsHandler::MapValuesArrayTy &Sizes,
+                     MappableExprsHandler::MapFlagsArrayTy &MapTypes) {
+  auto &CGM = CGF.CGM;
+  auto &Ctx = CGF.getContext();
+
+  BasePointersArray = PointersArray = SizesArray = MapTypesArray = nullptr;
+
+  if (unsigned PointerNumVal = BasePointers.size()) {
+    // Detect if we have any capture size requiring runtime evaluation of the
+    // size so that a constant array could be eventually used.
+    bool hasRuntimeEvaluationCaptureSize = false;
+    for (auto *S : Sizes)
+      if (!isa<llvm::Constant>(S)) {
+        hasRuntimeEvaluationCaptureSize = true;
+        break;
+      }
+
+    llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
+    QualType PointerArrayType =
+        Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
+                                 /*IndexTypeQuals=*/0);
+
+    BasePointersArray =
+        CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
+    PointersArray =
+        CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
+
+    // If we don't have any VLA types or other types that require runtime
+    // evaluation, we can use a constant array for the map sizes, otherwise we
+    // need to fill up the arrays as we do for the pointers.
+    if (hasRuntimeEvaluationCaptureSize) {
+      QualType SizeArrayType = Ctx.getConstantArrayType(
+          Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
+          /*IndexTypeQuals=*/0);
+      SizesArray =
+          CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
+    } else {
+      // We expect all the sizes to be constant, so we collect them to create
+      // a constant array.
+      SmallVector<llvm::Constant *, 16> ConstSizes;
+      for (auto S : Sizes)
+        ConstSizes.push_back(cast<llvm::Constant>(S));
+
+      auto *SizesArrayInit = llvm::ConstantArray::get(
+          llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
+      auto *SizesArrayGbl = new llvm::GlobalVariable(
+          CGM.getModule(), SizesArrayInit->getType(),
+          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+          SizesArrayInit, ".offload_sizes");
+      SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+      SizesArray = SizesArrayGbl;
+    }
+
+    // The map types are always constant so we don't need to generate code to
+    // fill arrays. Instead, we create an array constant.
+    llvm::Constant *MapTypesArrayInit =
+        llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
+    auto *MapTypesArrayGbl = new llvm::GlobalVariable(
+        CGM.getModule(), MapTypesArrayInit->getType(),
+        /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+        MapTypesArrayInit, ".offload_maptypes");
+    MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+    MapTypesArray = MapTypesArrayGbl;
+
+    for (unsigned i = 0; i < PointerNumVal; ++i) {
+      llvm::Value *BPVal = BasePointers[i];
+      if (BPVal->getType()->isPointerTy())
+        BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
+      else {
+        assert(BPVal->getType()->isIntegerTy() &&
+               "If not a pointer, the value type must be an integer.");
+        BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
+      }
+      llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
+          0, i);
+      Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
+      CGF.Builder.CreateStore(BPVal, BPAddr);
+
+      llvm::Value *PVal = Pointers[i];
+      if (PVal->getType()->isPointerTy())
+        PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
+      else {
+        assert(PVal->getType()->isIntegerTy() &&
+               "If not a pointer, the value type must be an integer.");
+        PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
+      }
+      llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
+          i);
+      Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
+      CGF.Builder.CreateStore(PVal, PAddr);
+
+      if (hasRuntimeEvaluationCaptureSize) {
+        llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
+            llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
+            /*Idx0=*/0,
+            /*Idx1=*/i);
+        Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
+        CGF.Builder.CreateStore(
+            CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
+            SAddr);
+      }
+    }
+  }
+}
+/// \brief Emit the arguments to be passed to the runtime library based on the
+/// arrays of pointers, sizes and map types.
+static void emitOffloadingArraysArgument(
+    CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
+    llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
+    llvm::Value *&MapTypesArrayArg, llvm::Value *BasePointersArray,
+    llvm::Value *PointersArray, llvm::Value *SizesArray,
+    llvm::Value *MapTypesArray, unsigned NumElems) {
+  auto &CGM = CGF.CGM;
+  if (NumElems) {
+    BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+        llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), BasePointersArray,
+        /*Idx0=*/0, /*Idx1=*/0);
+    PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+        llvm::ArrayType::get(CGM.VoidPtrTy, NumElems), PointersArray,
+        /*Idx0=*/0,
+        /*Idx1=*/0);
+    SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+        llvm::ArrayType::get(CGM.SizeTy, NumElems), SizesArray,
+        /*Idx0=*/0, /*Idx1=*/0);
+    MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+        llvm::ArrayType::get(CGM.Int32Ty, NumElems), MapTypesArray,
+        /*Idx0=*/0,
+        /*Idx1=*/0);
+  } else {
+    BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+    PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+    SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
+    MapTypesArrayArg =
+        llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
+  }
+}
+
+void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D,
+                                     llvm::Value *OutlinedFn,
+                                     llvm::Value *OutlinedFnID,
+                                     const Expr *IfCond, const Expr *Device,
+                                     ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
 
   assert(OutlinedFn && "Invalid outlined function!");
 
   auto &Ctx = CGF.getContext();
 
-  // Fill up the arrays with the all the captured variables.
-  SmallVector<llvm::Value *, 16> BasePointers;
-  SmallVector<llvm::Value *, 16> Pointers;
-  SmallVector<llvm::Value *, 16> Sizes;
-  SmallVector<unsigned, 16> MapTypes;
+  // Fill up the arrays with all the captured variables.
+  MappableExprsHandler::MapValuesArrayTy KernelArgs;
+  MappableExprsHandler::MapValuesArrayTy BasePointers;
+  MappableExprsHandler::MapValuesArrayTy Pointers;
+  MappableExprsHandler::MapValuesArrayTy Sizes;
+  MappableExprsHandler::MapFlagsArrayTy MapTypes;
 
-  bool hasVLACaptures = false;
+  MappableExprsHandler::MapValuesArrayTy CurBasePointers;
+  MappableExprsHandler::MapValuesArrayTy CurPointers;
+  MappableExprsHandler::MapValuesArrayTy CurSizes;
+  MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
+
+  // Get mappable expression information.
+  MappableExprsHandler MEHandler(D, CGF);
 
   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
   auto RI = CS.getCapturedRecordDecl()->field_begin();
-  // auto II = CS.capture_init_begin();
   auto CV = CapturedVars.begin();
   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
                                             CE = CS.capture_end();
        CI != CE; ++CI, ++RI, ++CV) {
     StringRef Name;
     QualType Ty;
-    llvm::Value *BasePointer;
-    llvm::Value *Pointer;
-    llvm::Value *Size;
-    unsigned MapType;
 
-    // VLA sizes are passed to the outlined region by copy.
+    CurBasePointers.clear();
+    CurPointers.clear();
+    CurSizes.clear();
+    CurMapTypes.clear();
+
+    // VLA sizes are passed to the outlined region by copy and do not have map
+    // information associated.
     if (CI->capturesVariableArrayType()) {
-      BasePointer = Pointer = *CV;
-      Size = getTypeSize(CGF, RI->getType());
+      CurBasePointers.push_back(*CV);
+      CurPointers.push_back(*CV);
+      CurSizes.push_back(CGF.getTypeSize(RI->getType()));
       // Copy to the device as an argument. No need to retrieve it.
-      MapType = OMP_MAP_BYCOPY;
-      hasVLACaptures = true;
-    } else if (CI->capturesThis()) {
-      BasePointer = Pointer = *CV;
-      const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
-      Size = getTypeSize(CGF, PtrTy->getPointeeType());
-      // Default map type.
-      MapType = OMP_MAP_TO | OMP_MAP_FROM;
-    } else if (CI->capturesVariableByCopy()) {
-      MapType = OMP_MAP_BYCOPY;
-      if (!RI->getType()->isAnyPointerType()) {
-        // If the field is not a pointer, we need to save the actual value and
-        // load it as a void pointer.
-        auto DstAddr = CGF.CreateMemTemp(
-            Ctx.getUIntPtrType(),
-            Twine(CI->getCapturedVar()->getName()) + ".casted");
-        LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
-
-        auto *SrcAddrVal = CGF.EmitScalarConversion(
-            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
-            Ctx.getPointerType(RI->getType()), SourceLocation());
-        LValue SrcLV =
-            CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
-
-        // Store the value using the source type pointer.
-        CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
-
-        // Load the value using the destination type pointer.
-        BasePointer = Pointer =
-            CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
-      } else {
-        MapType |= OMP_MAP_PTR;
-        BasePointer = Pointer = *CV;
-      }
-      Size = getTypeSize(CGF, RI->getType());
+      CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
+                            MappableExprsHandler::OMP_MAP_FIRST_REF);
     } else {
-      assert(CI->capturesVariable() && "Expected captured reference.");
-      BasePointer = Pointer = *CV;
-
-      const ReferenceType *PtrTy =
-          cast<ReferenceType>(RI->getType().getTypePtr());
-      QualType ElementType = PtrTy->getPointeeType();
-      Size = getTypeSize(CGF, ElementType);
-      // The default map type for a scalar/complex type is 'to' because by
-      // default the value doesn't have to be retrieved. For an aggregate type,
-      // the default is 'tofrom'.
-      MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
-                                               : OMP_MAP_TO;
-      if (ElementType->isAnyPointerType())
-        MapType |= OMP_MAP_PTR;
+      // If we have any information in the map clause, we use it, otherwise we
+      // just do a default mapping.
+      MEHandler.generateInfoForCapture(CI, CurBasePointers, CurPointers,
+                                       CurSizes, CurMapTypes);
+      if (CurBasePointers.empty())
+        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
+                                         CurPointers, CurSizes, CurMapTypes);
     }
-
-    BasePointers.push_back(BasePointer);
-    Pointers.push_back(Pointer);
-    Sizes.push_back(Size);
-    MapTypes.push_back(MapType);
+    // We expect to have at least an element of information for this capture.
+    assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
+    assert(CurBasePointers.size() == CurPointers.size() &&
+           CurBasePointers.size() == CurSizes.size() &&
+           CurBasePointers.size() == CurMapTypes.size() &&
+           "Inconsistent map information sizes!");
+
+    // The kernel args are always the first elements of the base pointers
+    // associated with a capture.
+    KernelArgs.push_back(CurBasePointers.front());
+    // We need to append the results of this capture to what we already have.
+    BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
+    Pointers.append(CurPointers.begin(), CurPointers.end());
+    Sizes.append(CurSizes.begin(), CurSizes.end());
+    MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
   }
 
   // Keep track on whether the host function has to be executed.
@@ -3943,128 +5779,22 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
                         OffloadError);
 
   // Fill up the pointer arrays and transfer execution to the device.
-  auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
-                    hasVLACaptures, Device, OutlinedFnID, OffloadError,
-                    OffloadErrorQType](CodeGenFunction &CGF) {
-    unsigned PointerNumVal = BasePointers.size();
-    llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
+  auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
+                    OutlinedFnID, OffloadError, OffloadErrorQType,
+                    &D](CodeGenFunction &CGF, PrePostActionTy &) {
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    // Emit the offloading arrays.
     llvm::Value *BasePointersArray;
     llvm::Value *PointersArray;
     llvm::Value *SizesArray;
     llvm::Value *MapTypesArray;
-
-    if (PointerNumVal) {
-      llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
-      QualType PointerArrayType = Ctx.getConstantArrayType(
-          Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
-          /*IndexTypeQuals=*/0);
-
-      BasePointersArray =
-          CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
-      PointersArray =
-          CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
-
-      // If we don't have any VLA types, we can use a constant array for the map
-      // sizes, otherwise we need to fill up the arrays as we do for the
-      // pointers.
-      if (hasVLACaptures) {
-        QualType SizeArrayType = Ctx.getConstantArrayType(
-            Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
-            /*IndexTypeQuals=*/0);
-        SizesArray =
-            CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
-      } else {
-        // We expect all the sizes to be constant, so we collect them to create
-        // a constant array.
-        SmallVector<llvm::Constant *, 16> ConstSizes;
-        for (auto S : Sizes)
-          ConstSizes.push_back(cast<llvm::Constant>(S));
-
-        auto *SizesArrayInit = llvm::ConstantArray::get(
-            llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
-        auto *SizesArrayGbl = new llvm::GlobalVariable(
-            CGM.getModule(), SizesArrayInit->getType(),
-            /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
-            SizesArrayInit, ".offload_sizes");
-        SizesArrayGbl->setUnnamedAddr(true);
-        SizesArray = SizesArrayGbl;
-      }
-
-      // The map types are always constant so we don't need to generate code to
-      // fill arrays. Instead, we create an array constant.
-      llvm::Constant *MapTypesArrayInit =
-          llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
-      auto *MapTypesArrayGbl = new llvm::GlobalVariable(
-          CGM.getModule(), MapTypesArrayInit->getType(),
-          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
-          MapTypesArrayInit, ".offload_maptypes");
-      MapTypesArrayGbl->setUnnamedAddr(true);
-      MapTypesArray = MapTypesArrayGbl;
-
-      for (unsigned i = 0; i < PointerNumVal; ++i) {
-
-        llvm::Value *BPVal = BasePointers[i];
-        if (BPVal->getType()->isPointerTy())
-          BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
-        else {
-          assert(BPVal->getType()->isIntegerTy() &&
-                 "If not a pointer, the value type must be an integer.");
-          BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
-        }
-        llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
-            llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
-            BasePointersArray, 0, i);
-        Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
-        CGF.Builder.CreateStore(BPVal, BPAddr);
-
-        llvm::Value *PVal = Pointers[i];
-        if (PVal->getType()->isPointerTy())
-          PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
-        else {
-          assert(PVal->getType()->isIntegerTy() &&
-                 "If not a pointer, the value type must be an integer.");
-          PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
-        }
-        llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
-            llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
-            0, i);
-        Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
-        CGF.Builder.CreateStore(PVal, PAddr);
-
-        if (hasVLACaptures) {
-          llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
-              llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
-              /*Idx0=*/0,
-              /*Idx1=*/i);
-          Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
-          CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
-                                      Sizes[i], CGM.SizeTy, /*isSigned=*/true),
-                                  SAddr);
-        }
-      }
-
-      BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
-          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
-          /*Idx0=*/0, /*Idx1=*/0);
-      PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
-          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
-          /*Idx0=*/0,
-          /*Idx1=*/0);
-      SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
-          llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
-          /*Idx0=*/0, /*Idx1=*/0);
-      MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
-          llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
-          /*Idx0=*/0,
-          /*Idx1=*/0);
-
-    } else {
-      BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
-      PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
-      SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
-      MapTypesArray =
-          llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
-    }
+    emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
+                         MapTypesArray, BasePointers, Pointers, Sizes,
+                         MapTypes);
+    emitOffloadingArraysArgument(CGF, BasePointersArray, PointersArray,
+                                 SizesArray, MapTypesArray, BasePointersArray,
+                                 PointersArray, SizesArray, MapTypesArray,
+                                 BasePointers.size());
 
     // On top of the arrays that were filled up, the target offloading call
     // takes as arguments the device id as well as the host pointer. The host
@@ -4082,23 +5812,48 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
     llvm::Value *DeviceID;
     if (Device)
       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
-                                           CGM.Int32Ty, /*isSigned=*/true);
+                                           CGF.Int32Ty, /*isSigned=*/true);
     else
       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
 
-    llvm::Value *OffloadingArgs[] = {
-        DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
-        PointersArray, SizesArray,   MapTypesArray};
-    auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
-                                      OffloadingArgs);
+    // Emit the number of elements in the offloading arrays.
+    llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
+
+    // Return value of the runtime offloading call.
+    llvm::Value *Return;
+
+    auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
+    auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
+
+    // If we have NumTeams defined this means that we have an enclosed teams
+    // region. Therefore we also expect to have ThreadLimit defined. These two
+    // values should be defined in the presence of a teams directive, regardless
+    // of having any clauses associated. If the user is using teams but no
+    // clauses, these two values will be the default that should be passed to
+    // the runtime library - a 32-bit integer with the value zero.
+    if (NumTeams) {
+      assert(ThreadLimit && "Thread limit expression should be available along "
+                            "with number of teams.");
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,          OutlinedFnID,  PointerNum,
+          BasePointersArray, PointersArray, SizesArray,
+          MapTypesArray,     NumTeams,      ThreadLimit};
+      Return = CGF.EmitRuntimeCall(
+          RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+    } else {
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
+          PointersArray, SizesArray,   MapTypesArray};
+      Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
+                                   OffloadingArgs);
+    }
 
     CGF.EmitStoreOfScalar(Return, OffloadError);
   };
 
   // Notify that the host version must be executed.
-  auto &&ElseGen = [this, OffloadError,
-                    OffloadErrorQType](CodeGenFunction &CGF) {
-    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
+  auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
                           OffloadError);
   };
 
@@ -4107,15 +5862,15 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
   // regardless of the conditional in the if clause if, e.g., the user do not
   // specify target triples.
   if (OutlinedFnID) {
-    if (IfCond) {
+    if (IfCond)
       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
-    } else {
-      CodeGenFunction::RunCleanupsScope Scope(CGF);
-      ThenGen(CGF);
+    else {
+      RegionCodeGenTy ThenRCG(ThenGen);
+      ThenRCG(CGF);
     }
   } else {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    ElseGen(CGF);
+    RegionCodeGenTy ElseRCG(ElseGen);
+    ElseRCG(CGF);
   }
 
   // Check the error code and execute the host version if required.
@@ -4126,11 +5881,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
 
   CGF.EmitBlock(OffloadFailedBlock);
-  CGF.Builder.CreateCall(OutlinedFn, BasePointers);
+  CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
   CGF.EmitBranch(OffloadContBlock);
 
   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
-  return;
 }
 
 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
@@ -4148,26 +5902,27 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
     unsigned DeviceID;
     unsigned FileID;
     unsigned Line;
-    unsigned Column;
     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
-                             FileID, Line, Column);
+                             FileID, Line);
 
     // Is this a target region that should not be emitted as an entry point? If
     // so just signal we are done with this target region.
-    if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(
-            DeviceID, FileID, ParentName, Line, Column))
+    if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
+                                                            ParentName, Line))
       return;
 
     llvm::Function *Fn;
     llvm::Constant *Addr;
-    emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
-                               /*isOffloadEntry=*/true);
+    std::tie(Fn, Addr) =
+        CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
+            CGM, cast<OMPTargetDirective>(*E), ParentName,
+            /*isOffloadEntry=*/true);
     assert(Fn && Addr && "Target region emission failed.");
     return;
   }
 
   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
-    if (!E->getAssociatedStmt())
+    if (!E->hasAssociatedStmt())
       return;
 
     scanForTargetRegionsFunctions(
@@ -4183,8 +5938,6 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
   // Keep looking for target regions recursively.
   for (auto *II : S->children())
     scanForTargetRegionsFunctions(II, ParentName);
-
-  return;
 }
 
 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
@@ -4249,3 +6002,594 @@ llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
   // compilation unit.
   return createOffloadingBinaryDescriptorRegistration();
 }
+
+void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
+                                    const OMPExecutableDirective &D,
+                                    SourceLocation Loc,
+                                    llvm::Value *OutlinedFn,
+                                    ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  auto *RTLoc = emitUpdateLocation(CGF, Loc);
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+
+  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
+  llvm::Value *Args[] = {
+      RTLoc,
+      CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
+      CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
+  llvm::SmallVector<llvm::Value *, 16> RealArgs;
+  RealArgs.append(std::begin(Args), std::end(Args));
+  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
+
+  auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
+  CGF.EmitRuntimeCall(RTLFn, RealArgs);
+}
+
+void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
+                                         const Expr *NumTeams,
+                                         const Expr *ThreadLimit,
+                                         SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  auto *RTLoc = emitUpdateLocation(CGF, Loc);
+
+  llvm::Value *NumTeamsVal =
+      (NumTeams)
+          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
+                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
+          : CGF.Builder.getInt32(0);
+
+  llvm::Value *ThreadLimitVal =
+      (ThreadLimit)
+          ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
+                                      CGF.CGM.Int32Ty, /* isSigned = */ true)
+          : CGF.Builder.getInt32(0);
+
+  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
+  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
+                                     ThreadLimitVal};
+  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
+                      PushNumTeamsArgs);
+}
+
+void CGOpenMPRuntime::emitTargetDataCalls(CodeGenFunction &CGF,
+                                          const OMPExecutableDirective &D,
+                                          const Expr *IfCond,
+                                          const Expr *Device,
+                                          const RegionCodeGenTy &CodeGen) {
+
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  llvm::Value *BasePointersArray = nullptr;
+  llvm::Value *PointersArray = nullptr;
+  llvm::Value *SizesArray = nullptr;
+  llvm::Value *MapTypesArray = nullptr;
+  unsigned NumOfPtrs = 0;
+
+  // Generate the code for the opening of the data environment. Capture all the
+  // arguments of the runtime call by reference because they are used in the
+  // closing of the region.
+  auto &&BeginThenGen = [&D, &CGF, &BasePointersArray, &PointersArray,
+                         &SizesArray, &MapTypesArray, Device,
+                         &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
+    // Fill up the arrays with all the mapped variables.
+    MappableExprsHandler::MapValuesArrayTy BasePointers;
+    MappableExprsHandler::MapValuesArrayTy Pointers;
+    MappableExprsHandler::MapValuesArrayTy Sizes;
+    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+
+    // Get map clause information.
+    MappableExprsHandler MCHandler(D, CGF);
+    MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+    NumOfPtrs = BasePointers.size();
+
+    // Fill up the arrays and create the arguments.
+    emitOffloadingArrays(CGF, BasePointersArray, PointersArray, SizesArray,
+                         MapTypesArray, BasePointers, Pointers, Sizes,
+                         MapTypes);
+
+    llvm::Value *BasePointersArrayArg = nullptr;
+    llvm::Value *PointersArrayArg = nullptr;
+    llvm::Value *SizesArrayArg = nullptr;
+    llvm::Value *MapTypesArrayArg = nullptr;
+    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
+                                 SizesArrayArg, MapTypesArrayArg,
+                                 BasePointersArray, PointersArray, SizesArray,
+                                 MapTypesArray, NumOfPtrs);
+
+    // Emit device ID if any.
+    llvm::Value *DeviceID = nullptr;
+    if (Device)
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGF.Int32Ty, /*isSigned=*/true);
+    else
+      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+    // Emit the number of elements in the offloading arrays.
+    auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
+
+    llvm::Value *OffloadingArgs[] = {
+        DeviceID,         PointerNum,    BasePointersArrayArg,
+        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
+                        OffloadingArgs);
+  };
+
+  // Generate code for the closing of the data region.
+  auto &&EndThenGen = [&CGF, &BasePointersArray, &PointersArray, &SizesArray,
+                       &MapTypesArray, Device,
+                       &NumOfPtrs](CodeGenFunction &CGF, PrePostActionTy &) {
+    assert(BasePointersArray && PointersArray && SizesArray && MapTypesArray &&
+           NumOfPtrs && "Invalid data environment closing arguments.");
+
+    llvm::Value *BasePointersArrayArg = nullptr;
+    llvm::Value *PointersArrayArg = nullptr;
+    llvm::Value *SizesArrayArg = nullptr;
+    llvm::Value *MapTypesArrayArg = nullptr;
+    emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
+                                 SizesArrayArg, MapTypesArrayArg,
+                                 BasePointersArray, PointersArray, SizesArray,
+                                 MapTypesArray, NumOfPtrs);
+
+    // Emit device ID if any.
+    llvm::Value *DeviceID = nullptr;
+    if (Device)
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGF.Int32Ty, /*isSigned=*/true);
+    else
+      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+    // Emit the number of elements in the offloading arrays.
+    auto *PointerNum = CGF.Builder.getInt32(NumOfPtrs);
+
+    llvm::Value *OffloadingArgs[] = {
+        DeviceID,         PointerNum,    BasePointersArrayArg,
+        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
+                        OffloadingArgs);
+  };
+
+  // In the event we get an if clause, we don't have to take any action on the
+  // else side.
+  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
+
+  if (IfCond) {
+    emitOMPIfClause(CGF, IfCond, BeginThenGen, ElseGen);
+  } else {
+    RegionCodeGenTy BeginThenRCG(BeginThenGen);
+    BeginThenRCG(CGF);
+  }
+
+  CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, CodeGen);
+
+  if (IfCond) {
+    emitOMPIfClause(CGF, IfCond, EndThenGen, ElseGen);
+  } else {
+    RegionCodeGenTy EndThenRCG(EndThenGen);
+    EndThenRCG(CGF);
+  }
+}
+
+void CGOpenMPRuntime::emitTargetDataStandAloneCall(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
+    const Expr *Device) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  assert((isa<OMPTargetEnterDataDirective>(D) ||
+          isa<OMPTargetExitDataDirective>(D) ||
+          isa<OMPTargetUpdateDirective>(D)) &&
+         "Expecting either target enter, exit data, or update directives.");
+
+  // Generate the code for the opening of the data environment.
+  auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
+    // Fill up the arrays with all the mapped variables.
+    MappableExprsHandler::MapValuesArrayTy BasePointers;
+    MappableExprsHandler::MapValuesArrayTy Pointers;
+    MappableExprsHandler::MapValuesArrayTy Sizes;
+    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+
+    // Get map clause information.
+    MappableExprsHandler MEHandler(D, CGF);
+    MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+
+    llvm::Value *BasePointersArrayArg = nullptr;
+    llvm::Value *PointersArrayArg = nullptr;
+    llvm::Value *SizesArrayArg = nullptr;
+    llvm::Value *MapTypesArrayArg = nullptr;
+
+    // Fill up the arrays and create the arguments.
+    emitOffloadingArrays(CGF, BasePointersArrayArg, PointersArrayArg,
+                         SizesArrayArg, MapTypesArrayArg, BasePointers,
+                         Pointers, Sizes, MapTypes);
+    emitOffloadingArraysArgument(
+        CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
+        MapTypesArrayArg, BasePointersArrayArg, PointersArrayArg, SizesArrayArg,
+        MapTypesArrayArg, BasePointers.size());
+
+    // Emit device ID if any.
+    llvm::Value *DeviceID = nullptr;
+    if (Device)
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGF.Int32Ty, /*isSigned=*/true);
+    else
+      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+    // Emit the number of elements in the offloading arrays.
+    auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
+
+    llvm::Value *OffloadingArgs[] = {
+        DeviceID,         PointerNum,    BasePointersArrayArg,
+        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
+
+    auto &RT = CGF.CGM.getOpenMPRuntime();
+    // Select the right runtime function call for each expected standalone
+    // directive.
+    OpenMPRTLFunction RTLFn;
+    switch (D.getDirectiveKind()) {
+    default:
+      llvm_unreachable("Unexpected standalone target data directive.");
+      break;
+    case OMPD_target_enter_data:
+      RTLFn = OMPRTL__tgt_target_data_begin;
+      break;
+    case OMPD_target_exit_data:
+      RTLFn = OMPRTL__tgt_target_data_end;
+      break;
+    case OMPD_target_update:
+      RTLFn = OMPRTL__tgt_target_data_update;
+      break;
+    }
+    CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
+  };
+
+  // In the event we get an if clause, we don't have to take any action on the
+  // else side.
+  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
+
+  if (IfCond) {
+    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+  } else {
+    RegionCodeGenTy ThenGenRCG(ThenGen);
+    ThenGenRCG(CGF);
+  }
+}
+
+namespace {
+  /// Kind of parameter in a function with 'declare simd' directive.
+  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
+  /// Attribute set of the parameter.
+  struct ParamAttrTy {
+    ParamKindTy Kind = Vector;
+    llvm::APSInt StrideOrArg;
+    llvm::APSInt Alignment;
+  };
+} // namespace
+
+static unsigned evaluateCDTSize(const FunctionDecl *FD,
+                                ArrayRef<ParamAttrTy> ParamAttrs) {
+  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
+  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
+  // of that clause. The VLEN value must be power of 2.
+  // In other case the notion of the function`s "characteristic data type" (CDT)
+  // is used to compute the vector length.
+  // CDT is defined in the following order:
+  //   a) For non-void function, the CDT is the return type.
+  //   b) If the function has any non-uniform, non-linear parameters, then the
+  //   CDT is the type of the first such parameter.
+  //   c) If the CDT determined by a) or b) above is struct, union, or class
+  //   type which is pass-by-value (except for the type that maps to the
+  //   built-in complex data type), the characteristic data type is int.
+  //   d) If none of the above three cases is applicable, the CDT is int.
+  // The VLEN is then determined based on the CDT and the size of vector
+  // register of that ISA for which current vector version is generated. The
+  // VLEN is computed using the formula below:
+  //   VLEN  = sizeof(vector_register) / sizeof(CDT),
+  // where vector register size specified in section 3.2.1 Registers and the
+  // Stack Frame of original AMD64 ABI document.
+  QualType RetType = FD->getReturnType();
+  if (RetType.isNull())
+    return 0;
+  ASTContext &C = FD->getASTContext();
+  QualType CDT;
+  if (!RetType.isNull() && !RetType->isVoidType())
+    CDT = RetType;
+  else {
+    unsigned Offset = 0;
+    if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
+      if (ParamAttrs[Offset].Kind == Vector)
+        CDT = C.getPointerType(C.getRecordType(MD->getParent()));
+      ++Offset;
+    }
+    if (CDT.isNull()) {
+      for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
+        if (ParamAttrs[I + Offset].Kind == Vector) {
+          CDT = FD->getParamDecl(I)->getType();
+          break;
+        }
+      }
+    }
+  }
+  if (CDT.isNull())
+    CDT = C.IntTy;
+  CDT = CDT->getCanonicalTypeUnqualified();
+  if (CDT->isRecordType() || CDT->isUnionType())
+    CDT = C.IntTy;
+  return C.getTypeSize(CDT);
+}
+
+static void
+emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
+                           llvm::APSInt VLENVal,
+                           ArrayRef<ParamAttrTy> ParamAttrs,
+                           OMPDeclareSimdDeclAttr::BranchStateTy State) {
+  struct ISADataTy {
+    char ISA;
+    unsigned VecRegSize;
+  };
+  ISADataTy ISAData[] = {
+      {
+          'b', 128
+      }, // SSE
+      {
+          'c', 256
+      }, // AVX
+      {
+          'd', 256
+      }, // AVX2
+      {
+          'e', 512
+      }, // AVX512
+  };
+  llvm::SmallVector<char, 2> Masked;
+  switch (State) {
+  case OMPDeclareSimdDeclAttr::BS_Undefined:
+    Masked.push_back('N');
+    Masked.push_back('M');
+    break;
+  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
+    Masked.push_back('N');
+    break;
+  case OMPDeclareSimdDeclAttr::BS_Inbranch:
+    Masked.push_back('M');
+    break;
+  }
+  for (auto Mask : Masked) {
+    for (auto &Data : ISAData) {
+      SmallString<256> Buffer;
+      llvm::raw_svector_ostream Out(Buffer);
+      Out << "_ZGV" << Data.ISA << Mask;
+      if (!VLENVal) {
+        Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
+                                         evaluateCDTSize(FD, ParamAttrs));
+      } else
+        Out << VLENVal;
+      for (auto &ParamAttr : ParamAttrs) {
+        switch (ParamAttr.Kind){
+        case LinearWithVarStride:
+          Out << 's' << ParamAttr.StrideOrArg;
+          break;
+        case Linear:
+          Out << 'l';
+          if (!!ParamAttr.StrideOrArg)
+            Out << ParamAttr.StrideOrArg;
+          break;
+        case Uniform:
+          Out << 'u';
+          break;
+        case Vector:
+          Out << 'v';
+          break;
+        }
+        if (!!ParamAttr.Alignment)
+          Out << 'a' << ParamAttr.Alignment;
+      }
+      Out << '_' << Fn->getName();
+      Fn->addFnAttr(Out.str());
+    }
+  }
+}
+
+void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
+                                              llvm::Function *Fn) {
+  ASTContext &C = CGM.getContext();
+  FD = FD->getCanonicalDecl();
+  // Map params to their positions in function decl.
+  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
+  if (isa<CXXMethodDecl>(FD))
+    ParamPositions.insert({FD, 0});
+  unsigned ParamPos = ParamPositions.size();
+  for (auto *P : FD->parameters()) {
+    ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
+    ++ParamPos;
+  }
+  for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
+    llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
+    // Mark uniform parameters.
+    for (auto *E : Attr->uniforms()) {
+      E = E->IgnoreParenImpCasts();
+      unsigned Pos;
+      if (isa<CXXThisExpr>(E))
+        Pos = ParamPositions[FD];
+      else {
+        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                        ->getCanonicalDecl();
+        Pos = ParamPositions[PVD];
+      }
+      ParamAttrs[Pos].Kind = Uniform;
+    }
+    // Get alignment info.
+    auto NI = Attr->alignments_begin();
+    for (auto *E : Attr->aligneds()) {
+      E = E->IgnoreParenImpCasts();
+      unsigned Pos;
+      QualType ParmTy;
+      if (isa<CXXThisExpr>(E)) {
+        Pos = ParamPositions[FD];
+        ParmTy = E->getType();
+      } else {
+        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                        ->getCanonicalDecl();
+        Pos = ParamPositions[PVD];
+        ParmTy = PVD->getType();
+      }
+      ParamAttrs[Pos].Alignment =
+          (*NI) ? (*NI)->EvaluateKnownConstInt(C)
+                : llvm::APSInt::getUnsigned(
+                      C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
+                          .getQuantity());
+      ++NI;
+    }
+    // Mark linear parameters.
+    auto SI = Attr->steps_begin();
+    auto MI = Attr->modifiers_begin();
+    for (auto *E : Attr->linears()) {
+      E = E->IgnoreParenImpCasts();
+      unsigned Pos;
+      if (isa<CXXThisExpr>(E))
+        Pos = ParamPositions[FD];
+      else {
+        auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
+                        ->getCanonicalDecl();
+        Pos = ParamPositions[PVD];
+      }
+      auto &ParamAttr = ParamAttrs[Pos];
+      ParamAttr.Kind = Linear;
+      if (*SI) {
+        if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
+                                  Expr::SE_AllowSideEffects)) {
+          if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
+            if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
+              ParamAttr.Kind = LinearWithVarStride;
+              ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
+                  ParamPositions[StridePVD->getCanonicalDecl()]);
+            }
+          }
+        }
+      }
+      ++SI;
+      ++MI;
+    }
+    llvm::APSInt VLENVal;
+    if (const Expr *VLEN = Attr->getSimdlen())
+      VLENVal = VLEN->EvaluateKnownConstInt(C);
+    OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
+    if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
+        CGM.getTriple().getArch() == llvm::Triple::x86_64)
+      emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
+  }
+}
+
+namespace {
+/// Cleanup action for doacross support.
+class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
+public:
+  static const int DoacrossFinArgs = 2;
+
+private:
+  llvm::Value *RTLFn;
+  llvm::Value *Args[DoacrossFinArgs];
+
+public:
+  DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
+      : RTLFn(RTLFn) {
+    assert(CallArgs.size() == DoacrossFinArgs);
+    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
+  }
+  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+    if (!CGF.HaveInsertPoint())
+      return;
+    CGF.EmitRuntimeCall(RTLFn, Args);
+  }
+};
+} // namespace
+
+void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
+                                       const OMPLoopDirective &D) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  ASTContext &C = CGM.getContext();
+  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
+  RecordDecl *RD;
+  if (KmpDimTy.isNull()) {
+    // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
+    //  kmp_int64 lo; // lower
+    //  kmp_int64 up; // upper
+    //  kmp_int64 st; // stride
+    // };
+    RD = C.buildImplicitRecord("kmp_dim");
+    RD->startDefinition();
+    addFieldToRecordDecl(C, RD, Int64Ty);
+    addFieldToRecordDecl(C, RD, Int64Ty);
+    addFieldToRecordDecl(C, RD, Int64Ty);
+    RD->completeDefinition();
+    KmpDimTy = C.getRecordType(RD);
+  } else
+    RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
+
+  Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
+  CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
+  enum { LowerFD = 0, UpperFD, StrideFD };
+  // Fill dims with data.
+  LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
+  // dims.upper = num_iterations;
+  LValue UpperLVal =
+      CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
+  llvm::Value *NumIterVal = CGF.EmitScalarConversion(
+      CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(),
+      Int64Ty, D.getNumIterations()->getExprLoc());
+  CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
+  // dims.stride = 1;
+  LValue StrideLVal =
+      CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
+  CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
+                        StrideLVal);
+
+  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
+  // kmp_int32 num_dims, struct kmp_dim * dims);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
+                         getThreadID(CGF, D.getLocStart()),
+                         llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
+                         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                             DimsAddr.getPointer(), CGM.VoidPtrTy)};
+
+  llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init);
+  CGF.EmitRuntimeCall(RTLFn, Args);
+  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
+      emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
+  llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
+  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
+                                             llvm::makeArrayRef(FiniArgs));
+}
+
+void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
+                                          const OMPDependClause *C) {
+  QualType Int64Ty =
+      CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
+  const Expr *CounterVal = C->getCounterValue();
+  assert(CounterVal);
+  llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
+                                                 CounterVal->getType(), Int64Ty,
+                                                 CounterVal->getExprLoc());
+  Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
+  CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
+                         getThreadID(CGF, C->getLocStart()),
+                         CntAddr.getPointer()};
+  llvm::Value *RTLFn;
+  if (C->getDependencyKind() == OMPC_DEPEND_source)
+    RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
+  else {
+    assert(C->getDependencyKind() == OMPC_DEPEND_sink);
+    RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
+  }
+  CGF.EmitRuntimeCall(RTLFn, Args);
+}
+
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
index b325637..270de8d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -14,18 +14,19 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 
+#include "CGValue.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/OpenMPKinds.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/ValueHandle.h"
 
 namespace llvm {
 class ArrayType;
 class Constant;
-class Function;
 class FunctionType;
 class GlobalVariable;
 class StructType;
@@ -36,222 +37,116 @@ class Value;
 namespace clang {
 class Expr;
 class GlobalDecl;
+class OMPDependClause;
 class OMPExecutableDirective;
+class OMPLoopDirective;
 class VarDecl;
+class OMPDeclareReductionDecl;
+class IdentifierInfo;
 
 namespace CodeGen {
 class Address;
 class CodeGenFunction;
 class CodeGenModule;
 
-typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy;
+/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
+/// region.
+class PrePostActionTy {
+public:
+  explicit PrePostActionTy() {}
+  virtual void Enter(CodeGenFunction &CGF) {}
+  virtual void Exit(CodeGenFunction &CGF) {}
+  virtual ~PrePostActionTy() {}
+};
 
-class CGOpenMPRuntime {
-private:
-  enum OpenMPRTLFunction {
-    /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
-    /// kmpc_micro microtask, ...);
-    OMPRTL__kmpc_fork_call,
-    /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
-    /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
-    OMPRTL__kmpc_threadprivate_cached,
-    /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
-    /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
-    OMPRTL__kmpc_threadprivate_register,
-    // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
-    OMPRTL__kmpc_global_thread_num,
-    // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *crit);
-    OMPRTL__kmpc_critical,
-    // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
-    // global_tid, kmp_critical_name *crit, uintptr_t hint);
-    OMPRTL__kmpc_critical_with_hint,
-    // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *crit);
-    OMPRTL__kmpc_end_critical,
-    // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_cancel_barrier,
-    // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_barrier,
-    // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_for_static_fini,
-    // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_serialized_parallel,
-    // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_end_serialized_parallel,
-    // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 num_threads);
-    OMPRTL__kmpc_push_num_threads,
-    // Call to void __kmpc_flush(ident_t *loc);
-    OMPRTL__kmpc_flush,
-    // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
-    OMPRTL__kmpc_master,
-    // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
-    OMPRTL__kmpc_end_master,
-    // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
-    // int end_part);
-    OMPRTL__kmpc_omp_taskyield,
-    // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
-    OMPRTL__kmpc_single,
-    // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
-    OMPRTL__kmpc_end_single,
-    // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
-    // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
-    // kmp_routine_entry_t *task_entry);
-    OMPRTL__kmpc_omp_task_alloc,
-    // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
-    // new_task);
-    OMPRTL__kmpc_omp_task,
-    // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
-    // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
-    // kmp_int32 didit);
-    OMPRTL__kmpc_copyprivate,
-    // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
-    // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
-    OMPRTL__kmpc_reduce,
-    // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
-    // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
-    // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
-    // *lck);
-    OMPRTL__kmpc_reduce_nowait,
-    // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *lck);
-    OMPRTL__kmpc_end_reduce,
-    // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
-    // kmp_critical_name *lck);
-    OMPRTL__kmpc_end_reduce_nowait,
-    // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
-    // kmp_task_t * new_task);
-    OMPRTL__kmpc_omp_task_begin_if0,
-    // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
-    // kmp_task_t * new_task);
-    OMPRTL__kmpc_omp_task_complete_if0,
-    // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_ordered,
-    // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_end_ordered,
-    // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
-    // global_tid);
-    OMPRTL__kmpc_omp_taskwait,
-    // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_taskgroup,
-    // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
-    OMPRTL__kmpc_end_taskgroup,
-    // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
-    // int proc_bind);
-    OMPRTL__kmpc_push_proc_bind,
-    // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
-    // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
-    // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
-    OMPRTL__kmpc_omp_task_with_deps,
-    // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
-    // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
-    // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
-    OMPRTL__kmpc_omp_wait_deps,
-    // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
-    // global_tid, kmp_int32 cncl_kind);
-    OMPRTL__kmpc_cancellationpoint,
-    // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
-    // kmp_int32 cncl_kind);
-    OMPRTL__kmpc_cancel,
-
-    //
-    // Offloading related calls
-    //
-    // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
-    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
-    // *arg_types);
-    OMPRTL__tgt_target,
-    // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
-    OMPRTL__tgt_register_lib,
-    // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
-    OMPRTL__tgt_unregister_lib,
-  };
+/// Class provides a way to call simple version of codegen for OpenMP region, or
+/// an advanced with possible pre|post-actions in codegen.
+class RegionCodeGenTy final {
+  intptr_t CodeGen;
+  typedef void (*CodeGenTy)(intptr_t, CodeGenFunction &, PrePostActionTy &);
+  CodeGenTy Callback;
+  mutable PrePostActionTy *PrePostAction;
+  RegionCodeGenTy() = delete;
+  RegionCodeGenTy &operator=(const RegionCodeGenTy &) = delete;
+  template <typename Callable>
+  static void CallbackFn(intptr_t CodeGen, CodeGenFunction &CGF,
+                         PrePostActionTy &Action) {
+    return (*reinterpret_cast<Callable *>(CodeGen))(CGF, Action);
+  }
 
-  /// \brief Values for bit flags used in the ident_t to describe the fields.
-  /// All enumeric elements are named and described in accordance with the code
-  /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
-  enum OpenMPLocationFlags {
-    /// \brief Use trampoline for internal microtask.
-    OMP_IDENT_IMD = 0x01,
-    /// \brief Use c-style ident structure.
-    OMP_IDENT_KMPC = 0x02,
-    /// \brief Atomic reduction option for kmpc_reduce.
-    OMP_ATOMIC_REDUCE = 0x10,
-    /// \brief Explicit 'barrier' directive.
-    OMP_IDENT_BARRIER_EXPL = 0x20,
-    /// \brief Implicit barrier in code.
-    OMP_IDENT_BARRIER_IMPL = 0x40,
-    /// \brief Implicit barrier in 'for' directive.
-    OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
-    /// \brief Implicit barrier in 'sections' directive.
-    OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
-    /// \brief Implicit barrier in 'single' directive.
-    OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
-  };
+public:
+  template <typename Callable>
+  RegionCodeGenTy(
+      Callable &&CodeGen,
+      typename std::enable_if<
+          !std::is_same<typename std::remove_reference<Callable>::type,
+                        RegionCodeGenTy>::value>::type * = nullptr)
+      : CodeGen(reinterpret_cast<intptr_t>(&CodeGen)),
+        Callback(CallbackFn<typename std::remove_reference<Callable>::type>),
+        PrePostAction(nullptr) {}
+  void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; }
+  void operator()(CodeGenFunction &CGF) const;
+};
+
+struct OMPTaskDataTy final {
+  SmallVector<const Expr *, 4> PrivateVars;
+  SmallVector<const Expr *, 4> PrivateCopies;
+  SmallVector<const Expr *, 4> FirstprivateVars;
+  SmallVector<const Expr *, 4> FirstprivateCopies;
+  SmallVector<const Expr *, 4> FirstprivateInits;
+  SmallVector<const Expr *, 4> LastprivateVars;
+  SmallVector<const Expr *, 4> LastprivateCopies;
+  SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences;
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule;
+  llvm::PointerIntPair<llvm::Value *, 1, bool> Priority;
+  unsigned NumberOfParts = 0;
+  bool Tied = true;
+  bool Nogroup = false;
+};
+
+class CGOpenMPRuntime {
+protected:
   CodeGenModule &CGM;
+
+  /// \brief Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr and size \a Size.
+  virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+                                  uint64_t Size);
+
+  /// \brief Helper to emit outlined function for 'target' directive.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// \param CodeGen Lambda codegen specific to an accelerator device.
+  /// An oulined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D,
+                                                StringRef ParentName,
+                                                llvm::Function *&OutlinedFn,
+                                                llvm::Constant *&OutlinedFnID,
+                                                bool IsOffloadEntry,
+                                                const RegionCodeGenTy &CodeGen);
+
+private:
   /// \brief Default const ident_t object used for initialization of all other
   /// ident_t objects.
-  llvm::Constant *DefaultOpenMPPSource;
+  llvm::Constant *DefaultOpenMPPSource = nullptr;
   /// \brief Map of flags and corresponding default locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
   OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
-  Address getOrCreateDefaultLocation(OpenMPLocationFlags Flags);
+  Address getOrCreateDefaultLocation(unsigned Flags);
 
-public:
-  /// \brief Describes ident structure that describes a source location.
-  /// All descriptions are taken from
-  /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
-  /// Original structure:
-  /// typedef struct ident {
-  ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
-  ///                                  see above  */
-  ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
-  ///                                  KMP_IDENT_KMPC identifies this union
-  ///                                  member  */
-  ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
-  ///                                  see above */
-  ///#if USE_ITT_BUILD
-  ///                            /*  but currently used for storing
-  ///                                region-specific ITT */
-  ///                            /*  contextual information. */
-  ///#endif /* USE_ITT_BUILD */
-  ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
-  ///                                 C++  */
-  ///    char const *psource;    /**< String describing the source location.
-  ///                            The string is composed of semi-colon separated
-  //                             fields which describe the source file,
-  ///                            the function and a pair of line numbers that
-  ///                            delimit the construct.
-  ///                             */
-  /// } ident_t;
-  enum IdentFieldIndex {
-    /// \brief might be used in Fortran
-    IdentField_Reserved_1,
-    /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
-    IdentField_Flags,
-    /// \brief Not really used in Fortran any more
-    IdentField_Reserved_2,
-    /// \brief Source[4] in Fortran, do not use for C++
-    IdentField_Reserved_3,
-    /// \brief String describing the source location. The string is composed of
-    /// semi-colon separated fields which describe the source file, the function
-    /// and a pair of line numbers that delimit the construct.
-    IdentField_PSource
-  };
-private:
-  llvm::StructType *IdentTy;
+  llvm::StructType *IdentTy = nullptr;
   /// \brief Map for SourceLocation and OpenMP runtime library debug locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
   OpenMPDebugLocMapTy OpenMPDebugLocMap;
   /// \brief The type for a microtask which gets passed to __kmpc_fork_call().
   /// Original representation is:
   /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
-  llvm::FunctionType *Kmpc_MicroTy;
+  llvm::FunctionType *Kmpc_MicroTy = nullptr;
   /// \brief Stores debug location and ThreadID for the function.
   struct DebugLocThreadIdTy {
     llvm::Value *DebugLoc;
@@ -261,6 +156,20 @@ private:
   typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
       OpenMPLocThreadIDMapTy;
   OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap;
+  /// Map of UDRs and corresponding combiner/initializer.
+  typedef llvm::DenseMap<const OMPDeclareReductionDecl *,
+                         std::pair<llvm::Function *, llvm::Function *>>
+      UDRMapTy;
+  UDRMapTy UDRMap;
+  /// Map of functions and locally defined UDRs.
+  typedef llvm::DenseMap<llvm::Function *,
+                         SmallVector<const OMPDeclareReductionDecl *, 4>>
+      FunctionUDRMapTy;
+  FunctionUDRMapTy FunctionUDRMap;
+  IdentifierInfo *In = nullptr;
+  IdentifierInfo *Out = nullptr;
+  IdentifierInfo *Priv = nullptr;
+  IdentifierInfo *Orig = nullptr;
   /// \brief Type kmp_critical_name, originally defined as typedef kmp_int32
   /// kmp_critical_name[8];
   llvm::ArrayType *KmpCriticalNameTy;
@@ -272,7 +181,7 @@ private:
   llvm::StringMap<llvm::AssertingVH<llvm::Constant>, llvm::BumpPtrAllocator>
       InternalVars;
   /// \brief Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);
-  llvm::Type *KmpRoutineEntryPtrTy;
+  llvm::Type *KmpRoutineEntryPtrTy = nullptr;
   QualType KmpRoutineEntryPtrQTy;
   /// \brief Type typedef struct kmp_task {
   ///    void *              shareds; /**< pointer to block of pointers to
@@ -293,6 +202,12 @@ private:
   ///    } flags;
   /// } kmp_depend_info_t;
   QualType KmpDependInfoTy;
+  /// struct kmp_dim {  // loop bounds info casted to kmp_int64
+  ///  kmp_int64 lo; // lower
+  ///  kmp_int64 up; // upper
+  ///  kmp_int64 st; // stride
+  /// };
+  QualType KmpDimTy;
   /// \brief Type struct __tgt_offload_entry{
   ///   void      *addr;       // Pointer to the offload entry info.
   ///                          // (function or global)
@@ -402,30 +317,27 @@ private:
     /// \brief Initialize target region entry.
     void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
                                          StringRef ParentName, unsigned LineNum,
-                                         unsigned ColNum, unsigned Order);
+                                         unsigned Order);
     /// \brief Register target region entry.
     void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
                                        StringRef ParentName, unsigned LineNum,
-                                       unsigned ColNum, llvm::Constant *Addr,
+                                       llvm::Constant *Addr,
                                        llvm::Constant *ID);
     /// \brief Return true if a target region entry with the provided
     /// information exists.
     bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
-                                  StringRef ParentName, unsigned LineNum,
-                                  unsigned ColNum) const;
+                                  StringRef ParentName, unsigned LineNum) const;
     /// brief Applies action \a Action on all registered entries.
     typedef llvm::function_ref<void(unsigned, unsigned, StringRef, unsigned,
-                                    unsigned, OffloadEntryInfoTargetRegion &)>
+                                    OffloadEntryInfoTargetRegion &)>
         OffloadTargetRegionEntryInfoActTy;
     void actOnTargetRegionEntriesInfo(
         const OffloadTargetRegionEntryInfoActTy &Action);
 
   private:
     // Storage for target region entries kind. The storage is to be indexed by
-    // file ID, device ID, parent function name, lane number, and column number.
+    // file ID, device ID, parent function name and line number.
     typedef llvm::DenseMap<unsigned, OffloadEntryInfoTargetRegion>
-        OffloadEntriesTargetRegionPerColumn;
-    typedef llvm::DenseMap<unsigned, OffloadEntriesTargetRegionPerColumn>
         OffloadEntriesTargetRegionPerLine;
     typedef llvm::StringMap<OffloadEntriesTargetRegionPerLine>
         OffloadEntriesTargetRegionPerParentName;
@@ -442,10 +354,6 @@ private:
   /// compilation unit. The function that does the registration is returned.
   llvm::Function *createOffloadingBinaryDescriptorRegistration();
 
-  /// \brief Creates offloading entry for the provided address \a Addr,
-  /// name \a Name and size \a Size.
-  void createOffloadEntry(llvm::Constant *Addr, StringRef Name, uint64_t Size);
-
   /// \brief Creates all the offload entries in the current compilation unit
   /// along with the associated metadata.
   void createOffloadEntriesAndInfoMetadata();
@@ -476,7 +384,7 @@ private:
   /// \param Flags Flags for OpenMP location.
   ///
   llvm::Value *emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc,
-                                  OpenMPLocationFlags Flags = OMP_IDENT_KMPC);
+                                  unsigned Flags = 0);
 
   /// \brief Returns pointer to ident_t type.
   llvm::Type *getIdentTyPointerTy();
@@ -487,7 +395,7 @@ private:
   /// \brief Returns specified OpenMP runtime function.
   /// \param Function OpenMP runtime function.
   /// \return Specified function.
-  llvm::Constant *createRuntimeFunction(OpenMPRTLFunction Function);
+  llvm::Constant *createRuntimeFunction(unsigned Function);
 
   /// \brief Returns __kmpc_for_static_init_* runtime function for the specified
   /// size \a IVSize and sign \a IVSigned.
@@ -530,7 +438,7 @@ private:
                                               const llvm::Twine &Name);
 
   /// \brief Set of threadprivate variables with the generated initializer.
-  llvm::DenseSet<const VarDecl *> ThreadPrivateWithDefinition;
+  llvm::SmallPtrSet<const VarDecl *, 4> ThreadPrivateWithDefinition;
 
   /// \brief Emits initialization code for the threadprivate variables.
   /// \param VDAddr Address of the global variable \a VD.
@@ -549,11 +457,52 @@ private:
   ///
   llvm::Value *getCriticalRegionLock(StringRef CriticalName);
 
+  struct TaskResultTy {
+    llvm::Value *NewTask = nullptr;
+    llvm::Value *TaskEntry = nullptr;
+    llvm::Value *NewTaskNewTaskTTy = nullptr;
+    LValue TDBase;
+    RecordDecl *KmpTaskTQTyRD = nullptr;
+    llvm::Value *TaskDupFn = nullptr;
+  };
+  /// Emit task region for the task directive. The task region is emitted in
+  /// several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// \param D Current task directive.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \p
+  /// TaskFunction.
+  /// \param Data Additional data for task generation like tiednsee, final
+  /// state, list of privates etc.
+  TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
+                            const OMPExecutableDirective &D,
+                            llvm::Value *TaskFunction, QualType SharedsTy,
+                            Address Shareds, const OMPTaskDataTy &Data);
+
 public:
   explicit CGOpenMPRuntime(CodeGenModule &CGM);
   virtual ~CGOpenMPRuntime() {}
   virtual void clear();
 
+  /// Emit code for the specified user defined reduction construct.
+  virtual void emitUserDefinedReduction(CodeGenFunction *CGF,
+                                        const OMPDeclareReductionDecl *D);
+  /// Get combiner/initializer for the specified user-defined reduction, if any.
+  virtual std::pair<llvm::Function *, llvm::Function *>
+  getUserDefinedReduction(const OMPDeclareReductionDecl *D);
   /// \brief Emits outlined function for the specified OpenMP parallel directive
   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
   /// kmp_int32 BoundID, struct context_vars*).
@@ -562,22 +511,30 @@ public:
   /// \param InnermostKind Kind of innermost directive (for simple directives it
   /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
-  virtual llvm::Value *emitParallelOutlinedFunction(
+  virtual llvm::Value *emitParallelOrTeamsOutlinedFunction(
       const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
       OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
 
   /// \brief Emits outlined function for the OpenMP task directive \a D. This
-  /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
-  /// PartID, struct context_vars*).
+  /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t*
+  /// TaskT).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param PartIDVar Variable for partition id in the current OpenMP untied
+  /// task region.
+  /// \param TaskTVar Variable for task_t argument.
   /// \param InnermostKind Kind of innermost directive (for simple directives it
   /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
+  /// \param Tied true if task is generated for tied task, false otherwise.
+  /// \param NumberOfParts Number of parts in untied task. Ignored for tied
+  /// tasks.
   ///
   virtual llvm::Value *emitTaskOutlinedFunction(
       const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
+      const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+      bool Tied, unsigned &NumberOfParts);
 
   /// \brief Cleans up references to the objects in finished function.
   ///
@@ -664,6 +621,14 @@ public:
   virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
                                   bool Chunked) const;
 
+  /// \brief Check if the specified \a ScheduleKind is static non-chunked.
+  /// This kind of distribute directive is emitted without outer loop.
+  /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
+  ///
+  virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
+                                  bool Chunked) const;
+
   /// \brief Check if the specified \a ScheduleKind is dynamic.
   /// This kind of worksharing directive is emitted without outer loop.
   /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
@@ -671,9 +636,9 @@ public:
   virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const;
 
   virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc,
-                                   OpenMPScheduleClauseKind SchedKind,
-                                   unsigned IVSize, bool IVSigned,
-                                   bool Ordered, llvm::Value *UB,
+                                   const OpenMPScheduleTy &ScheduleKind,
+                                   unsigned IVSize, bool IVSigned, bool Ordered,
+                                   llvm::Value *UB,
                                    llvm::Value *Chunk = nullptr);
 
   /// \brief Call the appropriate runtime routine to initialize it before start
@@ -685,7 +650,7 @@ public:
   ///
   /// \param CGF Reference to current CodeGenFunction.
   /// \param Loc Clang source location.
-  /// \param SchedKind Schedule kind, specified by the 'schedule' clause.
+  /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
   /// \param IVSize Size of the iteration variable in bits.
   /// \param IVSigned Sign of the interation variable.
   /// \param Ordered true if loop is ordered, false otherwise.
@@ -701,12 +666,36 @@ public:
   /// For the default (nullptr) value, the chunk 1 will be used.
   ///
   virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
-                                 OpenMPScheduleClauseKind SchedKind,
+                                 const OpenMPScheduleTy &ScheduleKind,
                                  unsigned IVSize, bool IVSigned, bool Ordered,
-                                 Address IL, Address LB,
-                                 Address UB, Address ST,
+                                 Address IL, Address LB, Address UB, Address ST,
                                  llvm::Value *Chunk = nullptr);
 
+  ///
+  /// \param CGF Reference to current CodeGenFunction.
+  /// \param Loc Clang source location.
+  /// \param SchedKind Schedule kind, specified by the 'dist_schedule' clause.
+  /// \param IVSize Size of the iteration variable in bits.
+  /// \param IVSigned Sign of the interation variable.
+  /// \param Ordered true if loop is ordered, false otherwise.
+  /// \param IL Address of the output variable in which the flag of the
+  /// last iteration is returned.
+  /// \param LB Address of the output variable in which the lower iteration
+  /// number is returned.
+  /// \param UB Address of the output variable in which the upper iteration
+  /// number is returned.
+  /// \param ST Address of the output variable in which the stride value is
+  /// returned nesessary to generated the static_chunked scheduled loop.
+  /// \param Chunk Value of the chunk for the static_chunked scheduled loop.
+  /// For the default (nullptr) value, the chunk 1 will be used.
+  ///
+  virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
+                                        OpenMPDistScheduleClauseKind SchedKind,
+                                        unsigned IVSize, bool IVSigned,
+                                        bool Ordered, Address IL, Address LB,
+                                        Address UB, Address ST,
+                                        llvm::Value *Chunk = nullptr);
+
   /// \brief Call the appropriate runtime routine to notify that we finished
   /// iteration of the ordered loop with the dynamic scheduling.
   ///
@@ -807,12 +796,6 @@ public:
   /// kmp_task_t *new_task), where new_task is a resulting structure from
   /// previous items.
   /// \param D Current task directive.
-  /// \param Tied true if the task is tied (the task is tied to the thread that
-  /// can suspend its task region), false - untied (the task is not tied to any
-  /// thread).
-  /// \param Final Contains either constant bool value, or llvm::Value * of i1
-  /// type for final clause. If the value is true, the task forces all of its
-  /// child tasks to become final and included tasks.
   /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
   /// /*part_id*/, captured_struct */*__context*/);
   /// \param SharedsTy A type which contains references the shared variables.
@@ -820,29 +803,47 @@ public:
   /// TaskFunction.
   /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
   /// otherwise.
-  /// \param PrivateVars List of references to private variables for the task
-  /// directive.
-  /// \param PrivateCopies List of private copies for each private variable in
-  /// \p PrivateVars.
-  /// \param FirstprivateVars List of references to private variables for the
-  /// task directive.
-  /// \param FirstprivateCopies List of private copies for each private variable
-  /// in \p FirstprivateVars.
-  /// \param FirstprivateInits List of references to auto generated variables
-  /// used for initialization of a single array element. Used if firstprivate
-  /// variable is of array type.
-  /// \param Dependences List of dependences for the 'task' construct, including
-  /// original expression and dependency type.
-  virtual void emitTaskCall(
-      CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
-      bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+  /// \param Data Additional data for task generation like tiednsee, final
+  /// state, list of privates etc.
+  virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
+                            const OMPExecutableDirective &D,
+                            llvm::Value *TaskFunction, QualType SharedsTy,
+                            Address Shareds, const Expr *IfCond,
+                            const OMPTaskDataTy &Data);
+
+  /// Emit task region for the taskloop directive. The taskloop region is
+  /// emitted in several steps:
+  /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32
+  /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+  /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the
+  /// function:
+  /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
+  ///   TaskFunction(gtid, tt->part_id, tt->shareds);
+  ///   return 0;
+  /// }
+  /// 2. Copy a list of shared variables to field shareds of the resulting
+  /// structure kmp_task_t returned by the previous call (if any).
+  /// 3. Copy a pointer to destructions function to field destructions of the
+  /// resulting structure kmp_task_t.
+  /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t
+  /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int
+  /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task
+  /// is a resulting structure from
+  /// previous items.
+  /// \param D Current task directive.
+  /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
+  /// /*part_id*/, captured_struct */*__context*/);
+  /// \param SharedsTy A type which contains references the shared variables.
+  /// \param Shareds Context with the list of shared variables from the \p
+  /// TaskFunction.
+  /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr
+  /// otherwise.
+  /// \param Data Additional data for task generation like tiednsee, final
+  /// state, list of privates etc.
+  virtual void emitTaskLoopCall(
+      CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
       llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
-      const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
-      ArrayRef<const Expr *> PrivateCopies,
-      ArrayRef<const Expr *> FirstprivateVars,
-      ArrayRef<const Expr *> FirstprivateCopies,
-      ArrayRef<const Expr *> FirstprivateInits,
-      ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences);
+      const Expr *IfCond, const OMPTaskDataTy &Data);
 
   /// \brief Emit code for the directive that does not require outlining.
   ///
@@ -926,13 +927,15 @@ public:
   /// \param OutlinedFn Outlined function value to be defined by this call.
   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
   /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// \param CodeGen Code generation sequence for the \a D directive.
   /// An oulined function may not be an entry if, e.g. the if clause always
   /// evaluates to false.
   virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
                                           StringRef ParentName,
                                           llvm::Function *&OutlinedFn,
                                           llvm::Constant *&OutlinedFnID,
-                                          bool IsOffloadEntry);
+                                          bool IsOffloadEntry,
+                                          const RegionCodeGenTy &CodeGen);
 
   /// \brief Emit the target offloading code associated with \a D. The emitted
   /// code attempts offloading the execution to the device, an the event of
@@ -972,6 +975,68 @@ public:
   /// was emitted in the current module and return the function that registers
   /// it.
   virtual llvm::Function *emitRegistrationFunction();
+
+  /// \brief Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  virtual void emitTeamsCall(CodeGenFunction &CGF,
+                             const OMPExecutableDirective &D,
+                             SourceLocation Loc, llvm::Value *OutlinedFn,
+                             ArrayRef<llvm::Value *> CapturedVars);
+
+  /// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code
+  /// for num_teams clause.
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+                                  const Expr *ThreadLimit, SourceLocation Loc);
+
+  /// \brief Emit the target data mapping code associated with \a D.
+  /// \param D Directive to emit.
+  /// \param IfCond Expression evaluated in if clause associated with the target
+  /// directive, or null if no if clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  /// \param CodeGen Function that emits the enclosed region.
+  virtual void emitTargetDataCalls(CodeGenFunction &CGF,
+                                   const OMPExecutableDirective &D,
+                                   const Expr *IfCond, const Expr *Device,
+                                   const RegionCodeGenTy &CodeGen);
+
+  /// \brief Emit the data mapping/movement code associated with the directive
+  /// \a D that should be of the form 'target [{enter|exit} data | update]'.
+  /// \param D Directive to emit.
+  /// \param IfCond Expression evaluated in if clause associated with the target
+  /// directive, or null if no if clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF,
+                                            const OMPExecutableDirective &D,
+                                            const Expr *IfCond,
+                                            const Expr *Device);
+
+  /// Marks function \a Fn with properly mangled versions of vector functions.
+  /// \param FD Function marked as 'declare simd'.
+  /// \param Fn LLVM function that must be marked with 'declare simd'
+  /// attributes.
+  virtual void emitDeclareSimdFunction(const FunctionDecl *FD,
+                                       llvm::Function *Fn);
+
+  /// Emit initialization for doacross loop nesting support.
+  /// \param D Loop-based construct used in doacross nesting construct.
+  virtual void emitDoacrossInit(CodeGenFunction &CGF,
+                                const OMPLoopDirective &D);
+
+  /// Emit code for doacross ordered directive with 'depend' clause.
+  /// \param C 'depend' clause with 'sink|source' dependency kind.
+  virtual void emitDoacrossOrdered(CodeGenFunction &CGF,
+                                   const OMPDependClause *C);
 };
 
 } // namespace CodeGen
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
new file mode 100644
index 0000000..d64f6df
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -0,0 +1,396 @@
+//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime code generation specialized to NVPTX
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntimeNVPTX.h"
+#include "clang/AST/DeclOpenMP.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+/// \brief Get the GPU warp size.
+llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXWarpSize(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
+      llvm::None, "nvptx_warp_size");
+}
+
+/// \brief Get the id of the current thread on the GPU.
+llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXThreadID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+      llvm::None, "nvptx_tid");
+}
+
+// \brief Get the maximum number of threads in a block of the GPU.
+llvm::Value *CGOpenMPRuntimeNVPTX::getNVPTXNumThreads(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+      llvm::None, "nvptx_num_threads");
+}
+
+/// \brief Get barrier to synchronize all threads in a block.
+void CGOpenMPRuntimeNVPTX::getNVPTXCTABarrier(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  Bld.CreateCall(llvm::Intrinsic::getDeclaration(
+      &CGM.getModule(), llvm::Intrinsic::nvvm_barrier0));
+}
+
+// \brief Synchronize all GPU threads in a block.
+void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
+  getNVPTXCTABarrier(CGF);
+}
+
+/// \brief Get the thread id of the OMP master thread.
+/// The master thread id is the first thread (lane) of the last warp in the
+/// GPU block.  Warp size is assumed to be some power of 2.
+/// Thread id is 0 indexed.
+/// E.g: If NumThreads is 33, master id is 32.
+///      If NumThreads is 64, master id is 32.
+///      If NumThreads is 1024, master id is 992.
+llvm::Value *CGOpenMPRuntimeNVPTX::getMasterThreadID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
+
+  // We assume that the warp size is a power of 2.
+  llvm::Value *Mask = Bld.CreateSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
+
+  return Bld.CreateAnd(Bld.CreateSub(NumThreads, Bld.getInt32(1)),
+                       Bld.CreateNot(Mask), "master_tid");
+}
+
+namespace {
+enum OpenMPRTLFunctionNVPTX {
+  /// \brief Call to void __kmpc_kernel_init(kmp_int32 omp_handle,
+  /// kmp_int32 thread_limit);
+  OMPRTL_NVPTX__kmpc_kernel_init,
+};
+
+// NVPTX Address space
+enum ADDRESS_SPACE {
+  ADDRESS_SPACE_SHARED = 3,
+};
+} // namespace
+
+CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
+    CodeGenModule &CGM)
+    : WorkerFn(nullptr), CGFI(nullptr) {
+  createWorkerFunction(CGM);
+}
+
+void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
+    CodeGenModule &CGM) {
+  // Create an worker function with no arguments.
+  CGFI = &CGM.getTypes().arrangeNullaryFunction();
+
+  WorkerFn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(*CGFI), llvm::GlobalValue::InternalLinkage,
+      /* placeholder */ "_worker", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, WorkerFn, *CGFI);
+  WorkerFn->setLinkage(llvm::GlobalValue::InternalLinkage);
+  WorkerFn->addFnAttr(llvm::Attribute::NoInline);
+}
+
+void CGOpenMPRuntimeNVPTX::initializeEnvironment() {
+  //
+  // Initialize master-worker control state in shared memory.
+  //
+
+  auto DL = CGM.getDataLayout();
+  ActiveWorkers = new llvm::GlobalVariable(
+      CGM.getModule(), CGM.Int32Ty, /*isConstant=*/false,
+      llvm::GlobalValue::CommonLinkage,
+      llvm::Constant::getNullValue(CGM.Int32Ty), "__omp_num_threads", 0,
+      llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+  ActiveWorkers->setAlignment(DL.getPrefTypeAlignment(CGM.Int32Ty));
+
+  WorkID = new llvm::GlobalVariable(
+      CGM.getModule(), CGM.Int64Ty, /*isConstant=*/false,
+      llvm::GlobalValue::CommonLinkage,
+      llvm::Constant::getNullValue(CGM.Int64Ty), "__tgt_work_id", 0,
+      llvm::GlobalVariable::NotThreadLocal, ADDRESS_SPACE_SHARED);
+  WorkID->setAlignment(DL.getPrefTypeAlignment(CGM.Int64Ty));
+}
+
+void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
+  auto &Ctx = CGM.getContext();
+
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, *WST.CGFI, {});
+  emitWorkerLoop(CGF, WST);
+  CGF.FinishFunction();
+}
+
+void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
+                                          WorkerFunctionState &WST) {
+  //
+  // The workers enter this loop and wait for parallel work from the master.
+  // When the master encounters a parallel region it sets up the work + variable
+  // arguments, and wakes up the workers.  The workers first check to see if
+  // they are required for the parallel region, i.e., within the # of requested
+  // parallel threads.  The activated workers load the variable arguments and
+  // execute the parallel work.
+  //
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
+  llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
+  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
+  llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+
+  CGF.EmitBranch(AwaitBB);
+
+  // Workers wait for work from master.
+  CGF.EmitBlock(AwaitBB);
+  // Wait for parallel work
+  syncCTAThreads(CGF);
+  // On termination condition (workid == 0), exit loop.
+  llvm::Value *ShouldTerminate = Bld.CreateICmpEQ(
+      Bld.CreateAlignedLoad(WorkID, WorkID->getAlignment()),
+      llvm::Constant::getNullValue(WorkID->getType()->getElementType()),
+      "should_terminate");
+  Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
+
+  // Activate requested workers.
+  CGF.EmitBlock(SelectWorkersBB);
+  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+  llvm::Value *ActiveThread = Bld.CreateICmpSLT(
+      ThreadID,
+      Bld.CreateAlignedLoad(ActiveWorkers, ActiveWorkers->getAlignment()),
+      "active_thread");
+  Bld.CreateCondBr(ActiveThread, ExecuteBB, BarrierBB);
+
+  // Signal start of parallel region.
+  CGF.EmitBlock(ExecuteBB);
+  // TODO: Add parallel work.
+
+  // Signal end of parallel region.
+  CGF.EmitBlock(TerminateBB);
+  CGF.EmitBranch(BarrierBB);
+
+  // All active and inactive workers wait at a barrier after parallel region.
+  CGF.EmitBlock(BarrierBB);
+  // Barrier after parallel region.
+  syncCTAThreads(CGF);
+  CGF.EmitBranch(AwaitBB);
+
+  // Exit target region.
+  CGF.EmitBlock(ExitBB);
+}
+
+// Setup NVPTX threads for master-worker OpenMP scheme.
+void CGOpenMPRuntimeNVPTX::emitEntryHeader(CodeGenFunction &CGF,
+                                           EntryFunctionState &EST,
+                                           WorkerFunctionState &WST) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  // Get the master thread id.
+  llvm::Value *MasterID = getMasterThreadID(CGF);
+  // Current thread's identifier.
+  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+
+  // Setup BBs in entry function.
+  llvm::BasicBlock *WorkerCheckBB = CGF.createBasicBlock(".check.for.worker");
+  llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
+  llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
+  EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  // The head (master thread) marches on while its body of companion threads in
+  // the warp go to sleep.
+  llvm::Value *ShouldDie =
+      Bld.CreateICmpUGT(ThreadID, MasterID, "excess_in_master_warp");
+  Bld.CreateCondBr(ShouldDie, EST.ExitBB, WorkerCheckBB);
+
+  // Select worker threads...
+  CGF.EmitBlock(WorkerCheckBB);
+  llvm::Value *IsWorker = Bld.CreateICmpULT(ThreadID, MasterID, "is_worker");
+  Bld.CreateCondBr(IsWorker, WorkerBB, MasterBB);
+
+  // ... and send to worker loop, awaiting parallel invocation.
+  CGF.EmitBlock(WorkerBB);
+  CGF.EmitCallOrInvoke(WST.WorkerFn, llvm::None);
+  CGF.EmitBranch(EST.ExitBB);
+
+  // Only master thread executes subsequent serial code.
+  CGF.EmitBlock(MasterBB);
+
+  // First action in sequential region:
+  // Initialize the state of the OpenMP runtime library on the GPU.
+  llvm::Value *Args[] = {Bld.getInt32(/*OmpHandle=*/0), getNVPTXThreadID(CGF)};
+  CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init),
+                      Args);
+}
+
+void CGOpenMPRuntimeNVPTX::emitEntryFooter(CodeGenFunction &CGF,
+                                           EntryFunctionState &EST) {
+  CGBuilderTy &Bld = CGF.Builder;
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
+  CGF.EmitBranch(TerminateBB);
+
+  CGF.EmitBlock(TerminateBB);
+  // Signal termination condition.
+  Bld.CreateAlignedStore(
+      llvm::Constant::getNullValue(WorkID->getType()->getElementType()), WorkID,
+      WorkID->getAlignment());
+  // Barrier to terminate worker threads.
+  syncCTAThreads(CGF);
+  // Master thread jumps to exit point.
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(EST.ExitBB);
+}
+
+/// \brief Returns specified OpenMP runtime function for the current OpenMP
+/// implementation.  Specialized for the NVPTX device.
+/// \param Function OpenMP runtime function.
+/// \return Specified function.
+llvm::Constant *
+CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
+  llvm::Constant *RTLFn = nullptr;
+  switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
+  case OMPRTL_NVPTX__kmpc_kernel_init: {
+    // Build void __kmpc_kernel_init(kmp_int32 omp_handle,
+    // kmp_int32 thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
+    break;
+  }
+  }
+  return RTLFn;
+}
+
+void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
+                                              llvm::Constant *Addr,
+                                              uint64_t Size) {
+  auto *F = dyn_cast<llvm::Function>(Addr);
+  // TODO: Add support for global variables on the device after declare target
+  // support.
+  if (!F)
+    return;
+  llvm::Module *M = F->getParent();
+  llvm::LLVMContext &Ctx = M->getContext();
+
+  // Get "nvvm.annotations" metadata node
+  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+  llvm::Metadata *MDVals[] = {
+      llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "kernel"),
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+  // Append metadata to nvvm.annotations
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
+void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
+    const OMPExecutableDirective &D, StringRef ParentName,
+    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+  if (!IsOffloadEntry) // Nothing to do.
+    return;
+
+  assert(!ParentName.empty() && "Invalid target region parent name!");
+
+  EntryFunctionState EST;
+  WorkerFunctionState WST(CGM);
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    CGOpenMPRuntimeNVPTX &RT;
+    CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
+    CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
+
+  public:
+    NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
+                         CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
+                         CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
+        : RT(RT), EST(EST), WST(WST) {}
+    void Enter(CodeGenFunction &CGF) override {
+      RT.emitEntryHeader(CGF, EST, WST);
+    }
+    void Exit(CodeGenFunction &CGF) override { RT.emitEntryFooter(CGF, EST); }
+  } Action(*this, EST, WST);
+  CodeGen.setAction(Action);
+  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+                                   IsOffloadEntry, CodeGen);
+
+  // Create the worker function
+  emitWorkerFunction(WST);
+
+  // Now change the name of the worker function to correspond to this target
+  // region's entry function.
+  WST.WorkerFn->setName(OutlinedFn->getName() + "_worker");
+}
+
+CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
+    : CGOpenMPRuntime(CGM), ActiveWorkers(nullptr), WorkID(nullptr) {
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    llvm_unreachable("OpenMP NVPTX can only handle device code.");
+
+  // Called once per module during initialization.
+  initializeEnvironment();
+}
+
+void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
+                                              const Expr *NumTeams,
+                                              const Expr *ThreadLimit,
+                                              SourceLocation Loc) {}
+
+llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+
+  llvm::Function *OutlinedFun = nullptr;
+  if (isa<OMPTeamsDirective>(D)) {
+    llvm::Value *OutlinedFunVal =
+        CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
+            D, ThreadIDVar, InnermostKind, CodeGen);
+    OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
+    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+  } else
+    llvm_unreachable("parallel directive is not yet supported for nvptx "
+                     "backend.");
+
+  return OutlinedFun;
+}
+
+void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
+                                         const OMPExecutableDirective &D,
+                                         SourceLocation Loc,
+                                         llvm::Value *OutlinedFn,
+                                         ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  Address ZeroAddr =
+      CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
+                           /*Name*/ ".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+  CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
new file mode 100644
index 0000000..a6c64b2
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -0,0 +1,179 @@
+//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a class for OpenMP runtime code generation specialized to NVPTX
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
+#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
+
+#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
+#include "llvm/IR/CallSite.h"
+
+namespace clang {
+namespace CodeGen {
+
+class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
+public:
+  class EntryFunctionState {
+  public:
+    llvm::BasicBlock *ExitBB;
+
+    EntryFunctionState() : ExitBB(nullptr){};
+  };
+
+  class WorkerFunctionState {
+  public:
+    llvm::Function *WorkerFn;
+    const CGFunctionInfo *CGFI;
+
+    WorkerFunctionState(CodeGenModule &CGM);
+
+  private:
+    void createWorkerFunction(CodeGenModule &CGM);
+  };
+
+  /// \brief Helper for target entry function. Guide the master and worker
+  /// threads to their respective locations.
+  void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+                       WorkerFunctionState &WST);
+
+  /// \brief Signal termination of OMP execution.
+  void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
+private:
+  //
+  // NVPTX calls.
+  //
+
+  /// \brief Get the GPU warp size.
+  llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
+
+  /// \brief Get the id of the current thread on the GPU.
+  llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
+
+  // \brief Get the maximum number of threads in a block of the GPU.
+  llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
+
+  /// \brief Get barrier to synchronize all threads in a block.
+  void getNVPTXCTABarrier(CodeGenFunction &CGF);
+
+  // \brief Synchronize all GPU threads in a block.
+  void syncCTAThreads(CodeGenFunction &CGF);
+
+  //
+  // OMP calls.
+  //
+
+  /// \brief Get the thread id of the OMP master thread.
+  /// The master thread id is the first thread (lane) of the last warp in the
+  /// GPU block.  Warp size is assumed to be some power of 2.
+  /// Thread id is 0 indexed.
+  /// E.g: If NumThreads is 33, master id is 32.
+  ///      If NumThreads is 64, master id is 32.
+  ///      If NumThreads is 1024, master id is 992.
+  llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
+
+  //
+  // Private state and methods.
+  //
+
+  // Master-worker control state.
+  // Number of requested OMP threads in parallel region.
+  llvm::GlobalVariable *ActiveWorkers;
+  // Outlined function for the workers to execute.
+  llvm::GlobalVariable *WorkID;
+
+  /// \brief Initialize master-worker control state.
+  void initializeEnvironment();
+
+  /// \brief Emit the worker function for the current target region.
+  void emitWorkerFunction(WorkerFunctionState &WST);
+
+  /// \brief Helper for worker function. Emit body of worker loop.
+  void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
+
+  /// \brief Returns specified OpenMP runtime function for the current OpenMP
+  /// implementation.  Specialized for the NVPTX device.
+  /// \param Function OpenMP runtime function.
+  /// \return Specified function.
+  llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
+
+  //
+  // Base class overrides.
+  //
+
+  /// \brief Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr and size \a Size.
+  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+                          uint64_t Size) override;
+
+  /// \brief Emit outlined function for 'target' directive on the NVPTX
+  /// device.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+                                  StringRef ParentName,
+                                  llvm::Function *&OutlinedFn,
+                                  llvm::Constant *&OutlinedFnID,
+                                  bool IsOffloadEntry,
+                                  const RegionCodeGenTy &CodeGen) override;
+
+public:
+  explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
+
+  /// \brief This function ought to emit, in the general case, a call to
+  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
+  // as these numbers are obtained through the PTX grid and block configuration.
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+                          const Expr *ThreadLimit, SourceLocation Loc) override;
+
+  /// \brief Emits inlined function for the specified OpenMP parallel
+  //  directive but an inlined function for teams.
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  llvm::Value *
+  emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
+                                      const VarDecl *ThreadIDVar,
+                                      OpenMPDirectiveKind InnermostKind,
+                                      const RegionCodeGenTy &CodeGen) override;
+
+  /// \brief Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                     SourceLocation Loc, llvm::Value *OutlinedFn,
+                     ArrayRef<llvm::Value *> CapturedVars) override;
+};
+
+} // CodeGen namespace.
+} // clang namespace.
+
+#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h
index d4ad33e..7b9c27d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayout.h
@@ -11,7 +11,7 @@
 #define LLVM_CLANG_LIB_CODEGEN_CGRECORDLAYOUT_H
 
 #include "clang/AST/CharUnits.h"
-#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/DerivedTypes.h"
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 375b59c..7d530a2 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -121,7 +121,7 @@ struct CGRecordLowering {
   /// \brief Wraps llvm::Type::getIntNTy with some implicit arguments.
   llvm::Type *getIntNType(uint64_t NumBits) {
     return llvm::Type::getIntNTy(Types.getLLVMContext(),
-        (unsigned)llvm::RoundUpToAlignment(NumBits, 8));
+                                 (unsigned)llvm::alignTo(NumBits, 8));
   }
   /// \brief Gets an llvm type of size NumBytes and alignment 1.
   llvm::Type *getByteArrayType(CharUnits NumBytes) {
@@ -555,7 +555,7 @@ void CGRecordLowering::clipTailPadding() {
     if (Member->Offset < Tail) {
       assert(Prior->Kind == MemberInfo::Field && !Prior->FD &&
              "Only storage fields have tail padding!");
-      Prior->Data = getByteArrayType(bitsToCharUnits(llvm::RoundUpToAlignment(
+      Prior->Data = getByteArrayType(bitsToCharUnits(llvm::alignTo(
           cast<llvm::IntegerType>(Prior->Data)->getIntegerBitWidth(), 8)));
     }
     if (Member->Data)
@@ -609,8 +609,8 @@ void CGRecordLowering::insertPadding() {
     CharUnits Offset = Member->Offset;
     assert(Offset >= Size);
     // Insert padding if we need to.
-    if (Offset != Size.RoundUpToAlignment(Packed ? CharUnits::One() :
-                                          getAlignment(Member->Data)))
+    if (Offset !=
+        Size.alignTo(Packed ? CharUnits::One() : getAlignment(Member->Data)))
       Padding.push_back(std::make_pair(Size, Offset - Size));
     Size = Offset + getSize(Member->Data);
   }
@@ -842,7 +842,7 @@ void CGRecordLayout::print(raw_ostream &OS) const {
   OS << "]>\n";
 }
 
-void CGRecordLayout::dump() const {
+LLVM_DUMP_METHOD void CGRecordLayout::dump() const {
   print(llvm::errs());
 }
 
@@ -855,6 +855,6 @@ void CGBitFieldInfo::print(raw_ostream &OS) const {
      << " StorageOffset:" << StorageOffset.getQuantity() << ">";
 }
 
-void CGBitFieldInfo::dump() const {
+LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const {
   print(llvm::errs());
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
index cc4fa2e..d815863 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp
@@ -256,15 +256,45 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
   case Stmt::OMPTargetDataDirectiveClass:
     EmitOMPTargetDataDirective(cast<OMPTargetDataDirective>(*S));
     break;
+  case Stmt::OMPTargetEnterDataDirectiveClass:
+    EmitOMPTargetEnterDataDirective(cast<OMPTargetEnterDataDirective>(*S));
+    break;
+  case Stmt::OMPTargetExitDataDirectiveClass:
+    EmitOMPTargetExitDataDirective(cast<OMPTargetExitDataDirective>(*S));
+    break;
+  case Stmt::OMPTargetParallelDirectiveClass:
+    EmitOMPTargetParallelDirective(cast<OMPTargetParallelDirective>(*S));
+    break;
+  case Stmt::OMPTargetParallelForDirectiveClass:
+    EmitOMPTargetParallelForDirective(cast<OMPTargetParallelForDirective>(*S));
+    break;
   case Stmt::OMPTaskLoopDirectiveClass:
     EmitOMPTaskLoopDirective(cast<OMPTaskLoopDirective>(*S));
     break;
   case Stmt::OMPTaskLoopSimdDirectiveClass:
     EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S));
     break;
-case Stmt::OMPDistributeDirectiveClass:
+  case Stmt::OMPDistributeDirectiveClass:
     EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S));
-	break;
+    break;
+  case Stmt::OMPTargetUpdateDirectiveClass:
+    EmitOMPTargetUpdateDirective(cast<OMPTargetUpdateDirective>(*S));
+    break;
+  case Stmt::OMPDistributeParallelForDirectiveClass:
+    EmitOMPDistributeParallelForDirective(
+        cast<OMPDistributeParallelForDirective>(*S));
+    break;
+  case Stmt::OMPDistributeParallelForSimdDirectiveClass:
+    EmitOMPDistributeParallelForSimdDirective(
+        cast<OMPDistributeParallelForSimdDirective>(*S));
+    break;
+  case Stmt::OMPDistributeSimdDirectiveClass:
+    EmitOMPDistributeSimdDirective(cast<OMPDistributeSimdDirective>(*S));
+    break;
+  case Stmt::OMPTargetParallelForSimdDirectiveClass:
+    EmitOMPTargetParallelForSimdDirective(
+        cast<OMPTargetParallelForSimdDirective>(*S));
+    break;
   }
 }
 
@@ -542,13 +572,17 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
   // unequal to 0.  The condition must be a scalar type.
   LexicalScope ConditionScope(*this, S.getCond()->getSourceRange());
 
+  if (S.getInit())
+    EmitStmt(S.getInit());
+
   if (S.getConditionVariable())
     EmitAutoVarDecl(*S.getConditionVariable());
 
   // If the condition constant folds and can be elided, try to avoid emitting
   // the condition and the dead arm of the if/else.
   bool CondConstant;
-  if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant)) {
+  if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant,
+                                   S.isConstexpr())) {
     // Figure out which block (then or else) is executed.
     const Stmt *Executed = S.getThen();
     const Stmt *Skipped  = S.getElse();
@@ -557,7 +591,7 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
 
     // If the skipped block has no labels in it, just emit the executed block.
     // This avoids emitting dead code and simplifies the CFG substantially.
-    if (!ContainsLabel(Skipped)) {
+    if (S.isConstexpr() || !ContainsLabel(Skipped)) {
       if (CondConstant)
         incrementProfileCounter(&S);
       if (Executed) {
@@ -617,7 +651,8 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
   JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
   EmitBlock(LoopHeader.getBlock());
 
-  LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs);
+  LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs,
+                 Builder.getCurrentDebugLocation());
 
   // Create an exit block for when the condition fails, which will
   // also become the break target.
@@ -708,7 +743,8 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
   // Emit the body of the loop.
   llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
 
-  LoopStack.push(LoopBody, CGM.getContext(), DoAttrs);
+  LoopStack.push(LoopBody, CGM.getContext(), DoAttrs,
+                 Builder.getCurrentDebugLocation());
 
   EmitBlockWithFallThrough(LoopBody, &S);
   {
@@ -760,6 +796,8 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
 
   LexicalScope ForScope(*this, S.getSourceRange());
 
+  llvm::DebugLoc DL = Builder.getCurrentDebugLocation();
+
   // Evaluate the first part before the loop.
   if (S.getInit())
     EmitStmt(S.getInit());
@@ -771,7 +809,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
   llvm::BasicBlock *CondBlock = Continue.getBlock();
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs);
+  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, DL);
 
   // If the for loop doesn't have an increment we can just use the
   // condition as the continue block.  Otherwise we'll need to create
@@ -856,9 +894,12 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
 
   LexicalScope ForScope(*this, S.getSourceRange());
 
+  llvm::DebugLoc DL = Builder.getCurrentDebugLocation();
+
   // Evaluate the first pieces before the loop.
   EmitStmt(S.getRangeStmt());
-  EmitStmt(S.getBeginEndStmt());
+  EmitStmt(S.getBeginStmt());
+  EmitStmt(S.getEndStmt());
 
   // Start the loop with a block that tests the condition.
   // If there's an increment, the continue scope will be overwritten
@@ -866,7 +907,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
   llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs);
+  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs, DL);
 
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
@@ -1147,7 +1188,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
   // If the body of the case is just a 'break', try to not emit an empty block.
   // If we're profiling or we're not optimizing, leave the block in for better
   // debug and coverage analysis.
-  if (!CGM.getCodeGenOpts().ProfileInstrGenerate &&
+  if (!CGM.getCodeGenOpts().hasProfileClangInstr() &&
       CGM.getCodeGenOpts().OptimizationLevel > 0 &&
       isa<BreakStmt>(S.getSubStmt())) {
     JumpDest Block = BreakContinueStack.back().BreakBlock;
@@ -1194,7 +1235,7 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
 
     if (SwitchWeights)
       SwitchWeights->push_back(getProfileCount(NextCase));
-    if (CGM.getCodeGenOpts().ProfileInstrGenerate) {
+    if (CGM.getCodeGenOpts().hasProfileClangInstr()) {
       CaseDest = createBasicBlock("sw.bb");
       EmitBlockWithFallThrough(CaseDest, &S);
     }
@@ -1208,6 +1249,14 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
 }
 
 void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) {
+  // If there is no enclosing switch instance that we're aware of, then this
+  // default statement can be elided. This situation only happens when we've
+  // constant-folded the switch.
+  if (!SwitchInsn) {
+    EmitStmt(S.getSubStmt());
+    return;
+  }
+
   llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
   assert(DefaultBlock->empty() &&
          "EmitDefaultStmt: Default block already defined?");
@@ -1274,6 +1323,10 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
     // Handle this as two cases: we might be looking for the SwitchCase (if so
     // the skipped statements must be skippable) or we might already have it.
     CompoundStmt::const_body_iterator I = CS->body_begin(), E = CS->body_end();
+    bool StartedInLiveCode = FoundCase;
+    unsigned StartSize = ResultStmts.size();
+
+    // If we've not found the case yet, scan through looking for it.
     if (Case) {
       // Keep track of whether we see a skipped declaration.  The code could be
       // using the declaration even if it is skipped, so we can't optimize out
@@ -1283,7 +1336,7 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
       // If we're looking for the case, just see if we can skip each of the
       // substatements.
       for (; Case && I != E; ++I) {
-        HadSkippedDecl |= isa<DeclStmt>(*I);
+        HadSkippedDecl |= CodeGenFunction::mightAddDeclToScope(*I);
 
         switch (CollectStatementsForCase(*I, Case, FoundCase, ResultStmts)) {
         case CSFC_Failure: return CSFC_Failure;
@@ -1319,11 +1372,19 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
           break;
         }
       }
+
+      if (!FoundCase)
+        return CSFC_Success;
+
+      assert(!HadSkippedDecl && "fallthrough after skipping decl");
     }
 
     // If we have statements in our range, then we know that the statements are
     // live and need to be added to the set of statements we're tracking.
+    bool AnyDecls = false;
     for (; I != E; ++I) {
+      AnyDecls |= CodeGenFunction::mightAddDeclToScope(*I);
+
       switch (CollectStatementsForCase(*I, nullptr, FoundCase, ResultStmts)) {
       case CSFC_Failure: return CSFC_Failure;
       case CSFC_FallThrough:
@@ -1341,7 +1402,24 @@ static CSFC_Result CollectStatementsForCase(const Stmt *S,
       }
     }
 
-    return Case ? CSFC_Success : CSFC_FallThrough;
+    // If we're about to fall out of a scope without hitting a 'break;', we
+    // can't perform the optimization if there were any decls in that scope
+    // (we'd lose their end-of-lifetime).
+    if (AnyDecls) {
+      // If the entire compound statement was live, there's one more thing we
+      // can try before giving up: emit the whole thing as a single statement.
+      // We can do that unless the statement contains a 'break;'.
+      // FIXME: Such a break must be at the end of a construct within this one.
+      // We could emit this by just ignoring the BreakStmts entirely.
+      if (StartedInLiveCode && !CodeGenFunction::containsBreak(S)) {
+        ResultStmts.resize(StartSize);
+        ResultStmts.push_back(S);
+      } else {
+        return CSFC_Failure;
+      }
+    }
+
+    return CSFC_FallThrough;
   }
 
   // Okay, this is some other statement that we don't handle explicitly, like a
@@ -1438,6 +1516,9 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
         incrementProfileCounter(Case);
       RunCleanupsScope ExecutedScope(*this);
 
+      if (S.getInit())
+        EmitStmt(S.getInit());
+
       // Emit the condition variable if needed inside the entire cleanup scope
       // used by this special case for constant folded switches.
       if (S.getConditionVariable())
@@ -1465,6 +1546,10 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog");
 
   RunCleanupsScope ConditionScope(*this);
+
+  if (S.getInit())
+    EmitStmt(S.getInit());
+
   if (S.getConditionVariable())
     EmitAutoVarDecl(*S.getConditionVariable());
   llvm::Value *CondV = EmitScalarExpr(S.getCond());
@@ -1537,16 +1622,13 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   // If the switch has a condition wrapped by __builtin_unpredictable,
   // create metadata that specifies that the switch is unpredictable.
   // Don't bother if not optimizing because that metadata would not be used.
-  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
-    if (const CallExpr *Call = dyn_cast<CallExpr>(S.getCond())) {
-      const Decl *TargetDecl = Call->getCalleeDecl();
-      if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
-        if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
-          llvm::MDBuilder MDHelper(getLLVMContext());
-          SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
-                                  MDHelper.createUnpredictable());
-        }
-      }
+  auto *Call = dyn_cast<CallExpr>(S.getCond());
+  if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
+    auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
+    if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+      llvm::MDBuilder MDHelper(getLLVMContext());
+      SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
+                              MDHelper.createUnpredictable());
     }
   }
 
@@ -2035,6 +2117,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
                                           llvm::ConstantAsMetadata::get(Loc)));
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all inline asm blocks in CUDA as convergent
+    // (meaning, they may call an intrinsically convergent op, such as bar.sync,
+    // and so can't have certain optimizations applied around them).
+    Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+                         llvm::Attribute::Convergent);
+  }
+
   // Extract all of the register value results from the asm.
   std::vector<llvm::Value*> RegResults;
   if (ResultRegTypes.size() == 1) {
@@ -2147,8 +2237,7 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
   // Create the function declaration.
   FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
-                                                    /*IsVariadic=*/false);
+    CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
 
   llvm::Function *F =
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d0ee891..d214340 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -11,15 +11,117 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CGCleanup.h"
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtOpenMP.h"
+#include "clang/AST/DeclOpenMP.h"
+#include "llvm/IR/CallSite.h"
 using namespace clang;
 using namespace CodeGen;
 
+namespace {
+/// Lexical scope for OpenMP executable constructs, that handles correct codegen
+/// for captured expressions.
+class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
+  void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+    for (const auto *C : S.clauses()) {
+      if (auto *CPI = OMPClauseWithPreInit::get(C)) {
+        if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
+          for (const auto *I : PreInit->decls()) {
+            if (!I->hasAttr<OMPCaptureNoInitAttr>())
+              CGF.EmitVarDecl(cast<VarDecl>(*I));
+            else {
+              CodeGenFunction::AutoVarEmission Emission =
+                  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
+              CGF.EmitAutoVarCleanups(Emission);
+            }
+          }
+        }
+      }
+    }
+  }
+  CodeGenFunction::OMPPrivateScope InlinedShareds;
+
+  static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
+    return CGF.LambdaCaptureFields.lookup(VD) ||
+           (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
+           (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
+  }
+
+public:
+  OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
+                  bool AsInlined = false)
+      : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
+        InlinedShareds(CGF) {
+    emitPreInitStmt(CGF, S);
+    if (AsInlined) {
+      if (S.hasAssociatedStmt()) {
+        auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
+        for (auto &C : CS->captures()) {
+          if (C.capturesVariable() || C.capturesVariableByCopy()) {
+            auto *VD = C.getCapturedVar();
+            DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+                            isCapturedVar(CGF, VD) ||
+                                (CGF.CapturedStmtInfo &&
+                                 InlinedShareds.isGlobalVarCaptured(VD)),
+                            VD->getType().getNonReferenceType(), VK_LValue,
+                            SourceLocation());
+            InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
+              return CGF.EmitLValue(&DRE).getAddress();
+            });
+          }
+        }
+        (void)InlinedShareds.Privatize();
+      }
+    }
+  }
+};
+
+/// Private scope for OpenMP loop-based directives, that supports capturing
+/// of used expression from loop statement.
+class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
+  void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
+    if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
+      if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
+        for (const auto *I : PreInits->decls())
+          CGF.EmitVarDecl(cast<VarDecl>(*I));
+      }
+    }
+  }
+
+public:
+  OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
+      : CodeGenFunction::RunCleanupsScope(CGF) {
+    emitPreInitStmt(CGF, S);
+  }
+};
+
+} // namespace
+
+llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
+  auto &C = getContext();
+  llvm::Value *Size = nullptr;
+  auto SizeInChars = C.getTypeSizeInChars(Ty);
+  if (SizeInChars.isZero()) {
+    // getTypeSizeInChars() returns 0 for a VLA.
+    while (auto *VAT = C.getAsVariableArrayType(Ty)) {
+      llvm::Value *ArraySize;
+      std::tie(ArraySize, Ty) = getVLASize(VAT);
+      Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
+    }
+    SizeInChars = C.getTypeSizeInChars(Ty);
+    if (SizeInChars.isZero())
+      return llvm::ConstantInt::get(SizeTy, /*V=*/0);
+    Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
+  } else
+    Size = CGM.getSize(SizeInChars);
+  return Size;
+}
+
 void CodeGenFunction::GenerateOpenMPCapturedVars(
     const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
   const RecordDecl *RD = S.getCapturedRecordDecl();
@@ -34,10 +136,33 @@ void CodeGenFunction::GenerateOpenMPCapturedVars(
       CapturedVars.push_back(Val);
     } else if (CurCap->capturesThis())
       CapturedVars.push_back(CXXThisValue);
-    else if (CurCap->capturesVariableByCopy())
-      CapturedVars.push_back(
-          EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
-    else {
+    else if (CurCap->capturesVariableByCopy()) {
+      llvm::Value *CV =
+          EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
+
+      // If the field is not a pointer, we need to save the actual value
+      // and load it as a void pointer.
+      if (!CurField->getType()->isAnyPointerType()) {
+        auto &Ctx = getContext();
+        auto DstAddr = CreateMemTemp(
+            Ctx.getUIntPtrType(),
+            Twine(CurCap->getCapturedVar()->getName()) + ".casted");
+        LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
+
+        auto *SrcAddrVal = EmitScalarConversion(
+            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
+            Ctx.getPointerType(CurField->getType()), SourceLocation());
+        LValue SrcLV =
+            MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
+
+        // Store the value using the source type pointer.
+        EmitStoreThroughLValue(RValue::get(CV), SrcLV);
+
+        // Load the value using the destination type pointer.
+        CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+      }
+      CapturedVars.push_back(CV);
+    } else {
       assert(CurCap->capturesVariable() && "Expected capture by reference.");
       CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
     }
@@ -107,8 +232,15 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
       assert(I->capturesVariableArrayType());
       II = &getContext().Idents.get("vla");
     }
-    if (ArgType->isVariablyModifiedType())
-      ArgType = getContext().getVariableArrayDecayedType(ArgType);
+    if (ArgType->isVariablyModifiedType()) {
+      bool IsReference = ArgType->isLValueReferenceType();
+      ArgType =
+          getContext().getCanonicalParamType(ArgType.getNonReferenceType());
+      if (IsReference && !ArgType->isPointerType()) {
+        ArgType = getContext().getLValueReferenceType(
+            ArgType, /*SpelledAsLValue=*/false);
+      }
+    }
     Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
                                              FD->getLocation(), II, ArgType));
     ++I;
@@ -120,8 +252,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
   // Create the function declaration.
   FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
-                                                    /*IsVariadic=*/false);
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
 
   llvm::Function *F = llvm::Function::Create(
@@ -141,7 +272,8 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
     // use the value that we get from the arguments.
     if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
       setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
-      ++Cnt, ++I;
+      ++Cnt;
+      ++I;
       continue;
     }
 
@@ -162,8 +294,14 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
       QualType VarTy = Var->getType();
       Address ArgAddr = ArgLVal.getAddress();
       if (!VarTy->isReferenceType()) {
-        ArgAddr = EmitLoadOfReference(
-            ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
+        if (ArgLVal.getType()->isLValueReferenceType()) {
+          ArgAddr = EmitLoadOfReference(
+              ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
+        } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
+          assert(ArgLVal.getType()->isPointerType());
+          ArgAddr = EmitLoadOfPointer(
+              ArgAddr, ArgLVal.getType()->castAs<PointerType>());
+        }
       }
       setAddrOfLocalVar(
           Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
@@ -172,17 +310,17 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
              "Not expecting a captured pointer.");
       auto *Var = I->getCapturedVar();
       QualType VarTy = Var->getType();
-      setAddrOfLocalVar(I->getCapturedVar(),
-                        castValueFromUintptr(*this, FD->getType(),
-                                             Args[Cnt]->getName(), ArgLVal,
-                                             VarTy->isReferenceType()));
+      setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(),
+                                                  Args[Cnt]->getName(), ArgLVal,
+                                                  VarTy->isReferenceType()));
     } else {
       // If 'this' is captured, load it into CXXThisValue.
       assert(I->capturesThis());
       CXXThisValue =
           EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
     }
-    ++Cnt, ++I;
+    ++Cnt;
+    ++I;
   }
 
   PGO.assignRegionCounters(GlobalDecl(CD), F);
@@ -256,12 +394,77 @@ void CodeGenFunction::EmitOMPAggregateAssign(
   EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
+/// Check if the combiner is a call to UDR combiner and if it is so return the
+/// UDR decl used for reduction.
+static const OMPDeclareReductionDecl *
+getReductionInit(const Expr *ReductionOp) {
+  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
+    if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
+      if (auto *DRE =
+              dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
+        if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
+          return DRD;
+  return nullptr;
+}
+
+static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
+                                             const OMPDeclareReductionDecl *DRD,
+                                             const Expr *InitOp,
+                                             Address Private, Address Original,
+                                             QualType Ty) {
+  if (DRD->getInitializer()) {
+    std::pair<llvm::Function *, llvm::Function *> Reduction =
+        CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
+    auto *CE = cast<CallExpr>(InitOp);
+    auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
+    const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+    const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
+    auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
+    auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
+    CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+    PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
+                            [=]() -> Address { return Private; });
+    PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
+                            [=]() -> Address { return Original; });
+    (void)PrivateScope.Privatize();
+    RValue Func = RValue::get(Reduction.second);
+    CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
+    CGF.EmitIgnoredExpr(InitOp);
+  } else {
+    llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
+    auto *GV = new llvm::GlobalVariable(
+        CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
+        llvm::GlobalValue::PrivateLinkage, Init, ".init");
+    LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
+    RValue InitRVal;
+    switch (CGF.getEvaluationKind(Ty)) {
+    case TEK_Scalar:
+      InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
+      break;
+    case TEK_Complex:
+      InitRVal =
+          RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
+      break;
+    case TEK_Aggregate:
+      InitRVal = RValue::getAggregate(LV.getAddress());
+      break;
+    }
+    OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
+    CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
+    CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
+                         /*IsInitializer=*/false);
+  }
+}
+
 /// \brief Emit initialization of arrays of complex types.
 /// \param DestAddr Address of the array.
 /// \param Type Type of array.
 /// \param Init Initial expression of array.
+/// \param SrcAddr Address of the original array.
 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
-                                 QualType Type, const Expr *Init) {
+                                 QualType Type, const Expr *Init,
+                                 Address SrcAddr = Address::invalid()) {
+  auto *DRD = getReductionInit(Init);
   // Perform element-by-element initialization.
   QualType ElementTy;
 
@@ -270,7 +473,13 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
   DestAddr =
       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
+  if (DRD)
+    SrcAddr =
+        CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
 
+  llvm::Value *SrcBegin = nullptr;
+  if (DRD)
+    SrcBegin = SrcAddr.getPointer();
   auto DestBegin = DestAddr.getPointer();
   // Cast from pointer to array type to pointer to single element.
   auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
@@ -287,6 +496,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
 
   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
 
+  llvm::PHINode *SrcElementPHI = nullptr;
+  Address SrcElementCurrent = Address::invalid();
+  if (DRD) {
+    SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
+                                          "omp.arraycpy.srcElementPast");
+    SrcElementPHI->addIncoming(SrcBegin, EntryBB);
+    SrcElementCurrent =
+        Address(SrcElementPHI,
+                SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+  }
   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
   DestElementPHI->addIncoming(DestBegin, EntryBB);
@@ -297,8 +516,19 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
   // Emit copy.
   {
     CodeGenFunction::RunCleanupsScope InitScope(CGF);
-    CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
-                         /*IsInitializer=*/false);
+    if (DRD && (DRD->getInitializer() || !Init)) {
+      emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
+                                       SrcElementCurrent, ElementTy);
+    } else
+      CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
+                           /*IsInitializer=*/false);
+  }
+
+  if (DRD) {
+    // Shift the address forward by one element.
+    auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
+        SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+    SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
   }
 
   // Shift the address forward by one element.
@@ -356,24 +586,42 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
                                                 OMPPrivateScope &PrivateScope) {
   if (!HaveInsertPoint())
     return false;
+  bool FirstprivateIsLastprivate = false;
+  llvm::DenseSet<const VarDecl *> Lastprivates;
+  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
+    for (const auto *D : C->varlists())
+      Lastprivates.insert(
+          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+  }
   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
+  CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
   for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto InitsRef = C->inits().begin();
     for (auto IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
-      if (EmittedAsFirstprivate.count(OrigVD) == 0) {
-        EmittedAsFirstprivate.insert(OrigVD);
+      bool ThisFirstprivateIsLastprivate =
+          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
+      auto *CapFD = CapturesInfo.lookup(OrigVD);
+      auto *FD = CapturedStmtInfo->lookup(OrigVD);
+      if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
+          !FD->getType()->isReferenceType()) {
+        EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
+        ++IRef;
+        ++InitsRef;
+        continue;
+      }
+      FirstprivateIsLastprivate =
+          FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
+      if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
         auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
         bool IsRegistered;
-        DeclRefExpr DRE(
-            const_cast<VarDecl *>(OrigVD),
-            /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
-                OrigVD) != nullptr,
-            (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
+        DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                        /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
+                        (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
         Address OriginalAddr = EmitLValue(&DRE).getAddress();
-        QualType Type = OrigVD->getType();
+        QualType Type = VD->getType();
         if (Type->isArrayType()) {
           // Emit VarDecl with copy init for arrays.
           // Get the address of the original variable captured in current
@@ -420,10 +668,11 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
         // Silence the warning about unused variable.
         (void)IsRegistered;
       }
-      ++IRef, ++InitsRef;
+      ++IRef;
+      ++InitsRef;
     }
   }
-  return !EmittedAsFirstprivate.empty();
+  return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
 }
 
 void CodeGenFunction::EmitOMPPrivateClause(
@@ -470,7 +719,6 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       QualType Type = VD->getType();
       if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
-
         // Get the address of the master variable. If we are emitting code with
         // TLS support, the address is passed from the master as field in the
         // captured declaration.
@@ -525,15 +773,27 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
   if (!HaveInsertPoint())
     return false;
   bool HasAtLeastOneLastprivate = false;
+  llvm::DenseSet<const VarDecl *> SIMDLCVs;
+  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+    auto *LoopDirective = cast<OMPLoopDirective>(&D);
+    for (auto *C : LoopDirective->counters()) {
+      SIMDLCVs.insert(
+          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
+    }
+  }
   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     HasAtLeastOneLastprivate = true;
+    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
+      break;
     auto IRef = C->varlist_begin();
     auto IDestRef = C->destination_exprs().begin();
     for (auto *IInit : C->private_copies()) {
       // Keep the address of the original variable for future update at the end
       // of the loop.
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      // Taskloops do not require additional initialization, it is done in
+      // runtime support library.
       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
         PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
@@ -547,27 +807,28 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
         // Check if the variable is also a firstprivate: in this case IInit is
         // not generated. Initialization of this variable will happen in codegen
         // for 'firstprivate' clause.
-        if (IInit) {
+        if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
-          bool IsRegistered =
-              PrivateScope.addPrivate(OrigVD, [&]() -> Address {
-                // Emit private VarDecl with copy init.
-                EmitDecl(*VD);
-                return GetAddrOfLocalVar(VD);
-              });
+          bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
+            // Emit private VarDecl with copy init.
+            EmitDecl(*VD);
+            return GetAddrOfLocalVar(VD);
+          });
           assert(IsRegistered &&
                  "lastprivate var already registered as private");
           (void)IsRegistered;
         }
       }
-      ++IRef, ++IDestRef;
+      ++IRef;
+      ++IDestRef;
     }
   }
   return HasAtLeastOneLastprivate;
 }
 
 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
-    const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
+    const OMPExecutableDirective &D, bool NoFinals,
+    llvm::Value *IsLastIterCond) {
   if (!HaveInsertPoint())
     return;
   // Emit following code:
@@ -584,16 +845,20 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
     Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
     EmitBlock(ThenBB);
   }
-  llvm::DenseMap<const Decl *, const Expr *> LoopCountersAndUpdates;
+  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
+  llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
   if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
     auto IC = LoopDirective->counters().begin();
     for (auto F : LoopDirective->finals()) {
-      auto *D = cast<DeclRefExpr>(*IC)->getDecl()->getCanonicalDecl();
-      LoopCountersAndUpdates[D] = F;
+      auto *D =
+          cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
+      if (NoFinals)
+        AlreadyEmittedVars.insert(D);
+      else
+        LoopCountersAndUpdates[D] = F;
       ++IC;
     }
   }
-  llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto ISrcRef = C->source_exprs().begin();
@@ -606,8 +871,8 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
         // If lastprivate variable is a loop control variable for loop-based
         // directive, update its value before copyin back to original
         // variable.
-        if (auto *UpExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
-          EmitIgnoredExpr(UpExpr);
+        if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
+          EmitIgnoredExpr(FinalExpr);
         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
         // Get the address of the original variable.
@@ -624,11 +889,61 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
       ++ISrcRef;
       ++IDestRef;
     }
+    if (auto *PostUpdate = C->getPostUpdateExpr())
+      EmitIgnoredExpr(PostUpdate);
   }
   if (IsLastIterCond)
     EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
+static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+                          LValue BaseLV, llvm::Value *Addr) {
+  Address Tmp = Address::invalid();
+  Address TopTmp = Address::invalid();
+  Address MostTopTmp = Address::invalid();
+  BaseTy = BaseTy.getNonReferenceType();
+  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+    Tmp = CGF.CreateMemTemp(BaseTy);
+    if (TopTmp.isValid())
+      CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
+    else
+      MostTopTmp = Tmp;
+    TopTmp = Tmp;
+    BaseTy = BaseTy->getPointeeType();
+  }
+  llvm::Type *Ty = BaseLV.getPointer()->getType();
+  if (Tmp.isValid())
+    Ty = Tmp.getElementType();
+  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
+  if (Tmp.isValid()) {
+    CGF.Builder.CreateStore(Addr, Tmp);
+    return MostTopTmp;
+  }
+  return Address(Addr, BaseLV.getAlignment());
+}
+
+static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+                          LValue BaseLV) {
+  BaseTy = BaseTy.getNonReferenceType();
+  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+         !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+    if (auto *PtrTy = BaseTy->getAs<PointerType>())
+      BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
+    else {
+      BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
+                                             BaseTy->castAs<ReferenceType>());
+    }
+    BaseTy = BaseTy->getPointeeType();
+  }
+  return CGF.MakeAddrLValue(
+      Address(
+          CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+              BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
+          BaseLV.getAlignment()),
+      BaseLV.getType(), BaseLV.getAlignmentSource());
+}
+
 void CodeGenFunction::EmitOMPReductionClauseInit(
     const OMPExecutableDirective &D,
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
@@ -638,10 +953,12 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     auto ILHS = C->lhs_exprs().begin();
     auto IRHS = C->rhs_exprs().begin();
     auto IPriv = C->privates().begin();
+    auto IRed = C->reduction_ops().begin();
     for (auto IRef : C->varlists()) {
       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
       auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
       auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
+      auto *DRD = getReductionInit(*IRed);
       if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
         auto *Base = OASE->getBase()->IgnoreParenImpCasts();
         while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
@@ -654,21 +971,9 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
         auto OASELValueUB =
             EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
         auto OriginalBaseLValue = EmitLValue(DE);
-        auto BaseLValue = OriginalBaseLValue;
-        auto *Zero = Builder.getInt64(/*C=*/0);
-        llvm::SmallVector<llvm::Value *, 4> Indexes;
-        Indexes.push_back(Zero);
-        auto *ItemTy =
-            OASELValueLB.getPointer()->getType()->getPointerElementType();
-        auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
-        while (Ty != ItemTy) {
-          Indexes.push_back(Zero);
-          Ty = Ty->getPointerElementType();
-        }
-        BaseLValue = MakeAddrLValue(
-            Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
-                    OASELValueLB.getAlignment()),
-            OASELValueLB.getType(), OASELValueLB.getAlignmentSource());
+        LValue BaseLValue =
+            loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
+                        OriginalBaseLValue);
         // Store the address of the original variable associated with the LHS
         // implicit variable.
         PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
@@ -676,8 +981,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
         });
         // Emit reduction copy.
         bool IsRegistered = PrivateScope.addPrivate(
-            OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB,
-                     OriginalBaseLValue]() -> Address {
+            OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
+                     OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
               // Emit VarDecl with copy init for arrays.
               // Get the address of the original variable captured in current
               // captured region.
@@ -695,15 +1000,17 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
               auto Emission = EmitAutoVarAlloca(*PrivateVD);
               auto Addr = Emission.getAllocatedAddress();
               auto *Init = PrivateVD->getInit();
-              EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
+              EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
+                                   DRD ? *IRed : Init,
+                                   OASELValueLB.getAddress());
               EmitAutoVarCleanups(Emission);
               // Emit private VarDecl with reduction init.
               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
                                                    OASELValueLB.getPointer());
               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
-              Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-                  Ptr, OriginalBaseLValue.getPointer()->getType());
-              return Address(Ptr, OriginalBaseLValue.getAlignment());
+              return castToBase(*this, OrigVD->getType(),
+                                OASELValueLB.getType(), OriginalBaseLValue,
+                                Ptr);
             });
         assert(IsRegistered && "private var already registered as private");
         // Silence the warning about unused variable.
@@ -719,21 +1026,8 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
         auto *OrigVD = cast<VarDecl>(DE->getDecl());
         auto ASELValue = EmitLValue(ASE);
         auto OriginalBaseLValue = EmitLValue(DE);
-        auto BaseLValue = OriginalBaseLValue;
-        auto *Zero = Builder.getInt64(/*C=*/0);
-        llvm::SmallVector<llvm::Value *, 4> Indexes;
-        Indexes.push_back(Zero);
-        auto *ItemTy =
-            ASELValue.getPointer()->getType()->getPointerElementType();
-        auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
-        while (Ty != ItemTy) {
-          Indexes.push_back(Zero);
-          Ty = Ty->getPointerElementType();
-        }
-        BaseLValue = MakeAddrLValue(
-            Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
-                    ASELValue.getAlignment()),
-            ASELValue.getType(), ASELValue.getAlignmentSource());
+        LValue BaseLValue = loadToBegin(
+            *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
         // Store the address of the original variable associated with the LHS
         // implicit variable.
         PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
@@ -741,49 +1035,114 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
         });
         // Emit reduction copy.
         bool IsRegistered = PrivateScope.addPrivate(
-            OrigVD, [this, PrivateVD, BaseLValue, ASELValue,
-                     OriginalBaseLValue]() -> Address {
+            OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
+                     OriginalBaseLValue, DRD, IRed]() -> Address {
               // Emit private VarDecl with reduction init.
-              EmitDecl(*PrivateVD);
-              auto Addr = GetAddrOfLocalVar(PrivateVD);
+              AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
+              auto Addr = Emission.getAllocatedAddress();
+              if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+                emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
+                                                 ASELValue.getAddress(),
+                                                 ASELValue.getType());
+              } else
+                EmitAutoVarInit(Emission);
+              EmitAutoVarCleanups(Emission);
               auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
                                                    ASELValue.getPointer());
               auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
-              Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
-                  Ptr, OriginalBaseLValue.getPointer()->getType());
-              return Address(Ptr, OriginalBaseLValue.getAlignment());
+              return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
+                                OriginalBaseLValue, Ptr);
             });
         assert(IsRegistered && "private var already registered as private");
         // Silence the warning about unused variable.
         (void)IsRegistered;
-        PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
-          return GetAddrOfLocalVar(PrivateVD);
+        PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
+          return Builder.CreateElementBitCast(
+              GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
+              "rhs.begin");
         });
       } else {
         auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
-        // Store the address of the original variable associated with the LHS
-        // implicit variable.
-        PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address {
+        QualType Type = PrivateVD->getType();
+        if (getContext().getAsArrayType(Type)) {
+          // Store the address of the original variable associated with the LHS
+          // implicit variable.
           DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
                           CapturedStmtInfo->lookup(OrigVD) != nullptr,
                           IRef->getType(), VK_LValue, IRef->getExprLoc());
-          return EmitLValue(&DRE).getAddress();
-        });
-        // Emit reduction copy.
-        bool IsRegistered =
-            PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address {
-              // Emit private VarDecl with reduction init.
-              EmitDecl(*PrivateVD);
-              return GetAddrOfLocalVar(PrivateVD);
-            });
-        assert(IsRegistered && "private var already registered as private");
-        // Silence the warning about unused variable.
-        (void)IsRegistered;
-        PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
-          return GetAddrOfLocalVar(PrivateVD);
-        });
+          Address OriginalAddr = EmitLValue(&DRE).getAddress();
+          PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
+                                          LHSVD]() -> Address {
+            OriginalAddr = Builder.CreateElementBitCast(
+                OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
+            return OriginalAddr;
+          });
+          bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
+            if (Type->isVariablyModifiedType()) {
+              CodeGenFunction::OpaqueValueMapping OpaqueMap(
+                  *this, cast<OpaqueValueExpr>(
+                             getContext()
+                                 .getAsVariableArrayType(PrivateVD->getType())
+                                 ->getSizeExpr()),
+                  RValue::get(
+                      getTypeSize(OrigVD->getType().getNonReferenceType())));
+              EmitVariablyModifiedType(Type);
+            }
+            auto Emission = EmitAutoVarAlloca(*PrivateVD);
+            auto Addr = Emission.getAllocatedAddress();
+            auto *Init = PrivateVD->getInit();
+            EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
+                                 DRD ? *IRed : Init, OriginalAddr);
+            EmitAutoVarCleanups(Emission);
+            return Emission.getAllocatedAddress();
+          });
+          assert(IsRegistered && "private var already registered as private");
+          // Silence the warning about unused variable.
+          (void)IsRegistered;
+          PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
+            return Builder.CreateElementBitCast(
+                GetAddrOfLocalVar(PrivateVD),
+                ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
+          });
+        } else {
+          // Store the address of the original variable associated with the LHS
+          // implicit variable.
+          Address OriginalAddr = Address::invalid();
+          PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
+                                          &OriginalAddr]() -> Address {
+            DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                            CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                            IRef->getType(), VK_LValue, IRef->getExprLoc());
+            OriginalAddr = EmitLValue(&DRE).getAddress();
+            return OriginalAddr;
+          });
+          // Emit reduction copy.
+          bool IsRegistered = PrivateScope.addPrivate(
+              OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
+                // Emit private VarDecl with reduction init.
+                AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
+                auto Addr = Emission.getAllocatedAddress();
+                if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+                  emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
+                                                   OriginalAddr,
+                                                   PrivateVD->getType());
+                } else
+                  EmitAutoVarInit(Emission);
+                EmitAutoVarCleanups(Emission);
+                return Addr;
+              });
+          assert(IsRegistered && "private var already registered as private");
+          // Silence the warning about unused variable.
+          (void)IsRegistered;
+          PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
+            return GetAddrOfLocalVar(PrivateVD);
+          });
+        }
       }
-      ++ILHS, ++IRHS, ++IPriv;
+      ++ILHS;
+      ++IRHS;
+      ++IPriv;
+      ++IRed;
     }
   }
 }
@@ -816,15 +1175,39 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
   }
 }
 
+static void emitPostUpdateForReductionClause(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D,
+    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  llvm::BasicBlock *DoneBB = nullptr;
+  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+    if (auto *PostUpdate = C->getPostUpdateExpr()) {
+      if (!DoneBB) {
+        if (auto *Cond = CondGen(CGF)) {
+          // If the first post-update expression is found, emit conditional
+          // block if it was requested.
+          auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
+          DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
+          CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
+          CGF.EmitBlock(ThenBB);
+        }
+      }
+      CGF.EmitIgnoredExpr(PostUpdate);
+    }
+  }
+  if (DoneBB)
+    CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
 static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
                                            const OMPExecutableDirective &S,
                                            OpenMPDirectiveKind InnermostKind,
                                            const RegionCodeGenTy &CodeGen) {
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
-  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
-  auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
-      S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+  auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
+      emitParallelOrTeamsOutlinedFunction(S,
+          *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
   if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
@@ -833,7 +1216,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
         CGF, NumThreads, NumThreadsClause->getLocStart());
   }
   if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
-    CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+    CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
   }
@@ -845,22 +1228,24 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
       break;
     }
   }
+
+  OMPLexicalScope Scope(CGF, S);
+  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
                                               CapturedVars, IfCond);
 }
 
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
   // Emit parallel region as a standalone region.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
     OMPPrivateScope PrivateScope(CGF);
     bool Copyins = CGF.EmitOMPCopyinClause(S);
-    bool Firstprivates = CGF.EmitOMPFirstprivateClause(S, PrivateScope);
-    if (Copyins || Firstprivates) {
+    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+    if (Copyins) {
       // Emit implicit barrier to synchronize threads and avoid data races on
-      // initialization of firstprivate variables or propagation master's thread
-      // values of threadprivate variables to local instances of that variables
-      // of all other implicit threads.
+      // propagation master's thread values of threadprivate variables to local
+      // instances of that variables of all other implicit threads.
       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
           /*ForceSimpleCall=*/true);
@@ -872,6 +1257,8 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
     CGF.EmitOMPReductionClauseFinal(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
+  emitPostUpdateForReductionClause(
+      *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
 }
 
 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
@@ -883,9 +1270,8 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
   }
   // Update the linear variables.
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
-    for (auto U : C->updates()) {
+    for (auto *U : C->updates())
       EmitIgnoredExpr(U);
-    }
   }
 
   // On a continue in the body, jump to the end.
@@ -908,7 +1294,7 @@ void CodeGenFunction::EmitOMPInnerLoop(
   // Start the loop with a block that tests the condition.
   auto CondBlock = createBasicBlock("omp.inner.for.cond");
   EmitBlock(CondBlock);
-  LoopStack.push(CondBlock);
+  LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
 
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
@@ -950,19 +1336,21 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
     return;
   // Emit inits for the linear variables.
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
-    for (auto Init : C->inits()) {
+    for (auto *Init : C->inits()) {
       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
-      auto *OrigVD = cast<VarDecl>(
-          cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())->getDecl());
-      DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
-                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                      VD->getInit()->getType(), VK_LValue,
-                      VD->getInit()->getExprLoc());
-      AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
-      EmitExprAsInit(&DRE, VD,
-               MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
-                     /*capturedByInit=*/false);
-      EmitAutoVarCleanups(Emission);
+      if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
+        AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
+        auto *OrigVD = cast<VarDecl>(Ref->getDecl());
+        DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                        VD->getInit()->getType(), VK_LValue,
+                        VD->getInit()->getExprLoc());
+        EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
+                                                VD->getType()),
+                       /*capturedByInit=*/false);
+        EmitAutoVarCleanups(Emission);
+      } else
+        EmitVarDecl(*VD);
     }
     // Emit the linear steps for the linear clauses.
     // If a step is not constant, it is pre-calculated before the loop.
@@ -975,27 +1363,42 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
   }
 }
 
-static void emitLinearClauseFinal(CodeGenFunction &CGF,
-                                  const OMPLoopDirective &D) {
-  if (!CGF.HaveInsertPoint())
+void CodeGenFunction::EmitOMPLinearClauseFinal(
+    const OMPLoopDirective &D,
+    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
+  if (!HaveInsertPoint())
     return;
+  llvm::BasicBlock *DoneBB = nullptr;
   // Emit the final values of the linear variables.
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
     auto IC = C->varlist_begin();
-    for (auto F : C->finals()) {
+    for (auto *F : C->finals()) {
+      if (!DoneBB) {
+        if (auto *Cond = CondGen(*this)) {
+          // If the first post-update expression is found, emit conditional
+          // block if it was requested.
+          auto *ThenBB = createBasicBlock(".omp.linear.pu");
+          DoneBB = createBasicBlock(".omp.linear.pu.done");
+          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
+          EmitBlock(ThenBB);
+        }
+      }
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
-                      CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
-      CodeGenFunction::OMPPrivateScope VarScope(CGF);
-      VarScope.addPrivate(OrigVD,
-                          [OrigAddr]() -> Address { return OrigAddr; });
+      Address OrigAddr = EmitLValue(&DRE).getAddress();
+      CodeGenFunction::OMPPrivateScope VarScope(*this);
+      VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
       (void)VarScope.Privatize();
-      CGF.EmitIgnoredExpr(F);
+      EmitIgnoredExpr(F);
       ++IC;
     }
+    if (auto *PostUpdate = C->getPostUpdateExpr())
+      EmitIgnoredExpr(PostUpdate);
   }
+  if (DoneBB)
+    EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
 static void emitAlignedClause(CodeGenFunction &CGF,
@@ -1031,25 +1434,34 @@ static void emitAlignedClause(CodeGenFunction &CGF,
   }
 }
 
-static void emitPrivateLoopCounters(CodeGenFunction &CGF,
-                                    CodeGenFunction::OMPPrivateScope &LoopScope,
-                                    ArrayRef<Expr *> Counters,
-                                    ArrayRef<Expr *> PrivateCounters) {
-  if (!CGF.HaveInsertPoint())
+void CodeGenFunction::EmitOMPPrivateLoopCounters(
+    const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
+  if (!HaveInsertPoint())
     return;
-  auto I = PrivateCounters.begin();
-  for (auto *E : Counters) {
+  auto I = S.private_counters().begin();
+  for (auto *E : S.counters()) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
-    Address Addr = Address::invalid();
-    (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
+    (void)LoopScope.addPrivate(VD, [&]() -> Address {
       // Emit var without initialization.
-      auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
-      CGF.EmitAutoVarCleanups(VarEmission);
-      Addr = VarEmission.getAllocatedAddress();
-      return Addr;
+      if (!LocalDeclMap.count(PrivateVD)) {
+        auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
+        EmitAutoVarCleanups(VarEmission);
+      }
+      DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
+                      /*RefersToEnclosingVariableOrCapture=*/false,
+                      (*I)->getType(), VK_LValue, (*I)->getExprLoc());
+      return EmitLValue(&DRE).getAddress();
     });
-    (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
+    if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
+        VD->hasGlobalStorage()) {
+      (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
+        DeclRefExpr DRE(const_cast<VarDecl *>(VD),
+                        LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
+                        E->getType(), VK_LValue, E->getExprLoc());
+        return EmitLValue(&DRE).getAddress();
+      });
+    }
     ++I;
   }
 }
@@ -1061,8 +1473,7 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
     return;
   {
     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
-    emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
-                            S.private_counters());
+    CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
     (void)PreCondScope.Privatize();
     // Get initial values of real counters.
     for (auto I : S.inits()) {
@@ -1073,25 +1484,35 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
   CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
 }
 
-static void
-emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
-                      CodeGenFunction::OMPPrivateScope &PrivateScope) {
-  if (!CGF.HaveInsertPoint())
+void CodeGenFunction::EmitOMPLinearClause(
+    const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
+  if (!HaveInsertPoint())
     return;
+  llvm::DenseSet<const VarDecl *> SIMDLCVs;
+  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+    auto *LoopDirective = cast<OMPLoopDirective>(&D);
+    for (auto *C : LoopDirective->counters()) {
+      SIMDLCVs.insert(
+          cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
+    }
+  }
   for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
     auto CurPrivate = C->privates().begin();
     for (auto *E : C->varlists()) {
       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
       auto *PrivateVD =
           cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
-      bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
-        // Emit private VarDecl with copy init.
-        CGF.EmitVarDecl(*PrivateVD);
-        return CGF.GetAddrOfLocalVar(PrivateVD);
-      });
-      assert(IsRegistered && "linear var already registered as private");
-      // Silence the warning about unused variable.
-      (void)IsRegistered;
+      if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
+        bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
+          // Emit private VarDecl with copy init.
+          EmitVarDecl(*PrivateVD);
+          return GetAddrOfLocalVar(PrivateVD);
+        });
+        assert(IsRegistered && "linear var already registered as private");
+        // Silence the warning about unused variable.
+        (void)IsRegistered;
+      } else
+        EmitVarDecl(*PrivateVD);
       ++CurPrivate;
     }
   }
@@ -1132,17 +1553,39 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
   emitSimdlenSafelenClause(*this, D, IsMonotonic);
 }
 
-void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
+void CodeGenFunction::EmitOMPSimdFinal(
+    const OMPLoopDirective &D,
+    const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
   if (!HaveInsertPoint())
     return;
+  llvm::BasicBlock *DoneBB = nullptr;
   auto IC = D.counters().begin();
+  auto IPC = D.private_counters().begin();
   for (auto F : D.finals()) {
     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
-    if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
-      DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
-                      CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                      (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      Address OrigAddr = EmitLValue(&DRE).getAddress();
+    auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
+    auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
+    if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
+        OrigVD->hasGlobalStorage() || CED) {
+      if (!DoneBB) {
+        if (auto *Cond = CondGen(*this)) {
+          // If the first post-update expression is found, emit conditional
+          // block if it was requested.
+          auto *ThenBB = createBasicBlock(".omp.final.then");
+          DoneBB = createBasicBlock(".omp.final.done");
+          Builder.CreateCondBr(Cond, ThenBB, DoneBB);
+          EmitBlock(ThenBB);
+        }
+      }
+      Address OrigAddr = Address::invalid();
+      if (CED)
+        OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
+      else {
+        DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
+                        /*RefersToEnclosingVariableOrCapture=*/false,
+                        (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
+        OrigAddr = EmitLValue(&DRE).getAddress();
+      }
       OMPPrivateScope VarScope(*this);
       VarScope.addPrivate(OrigVD,
                           [OrigAddr]() -> Address { return OrigAddr; });
@@ -1150,12 +1593,15 @@ void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
       EmitIgnoredExpr(F);
     }
     ++IC;
+    ++IPC;
   }
-  emitLinearClauseFinal(*this, D);
+  if (DoneBB)
+    EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    OMPLoopScope PreInitScope(CGF, S);
     // if (PreCond) {
     //   for (IV in 0..LastIteration) BODY;
     //   <Final counter/linear vars updates>;
@@ -1198,15 +1644,14 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
 
     emitAlignedClause(CGF, S);
     CGF.EmitOMPLinearClauseInit(S);
-    bool HasLastprivateClause;
     {
       OMPPrivateScope LoopScope(CGF);
-      emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
-                              S.private_counters());
-      emitPrivateLinearVars(CGF, S, LoopScope);
+      CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+      CGF.EmitOMPLinearClause(S, LoopScope);
       CGF.EmitOMPPrivateClause(S, LoopScope);
       CGF.EmitOMPReductionClauseInit(S, LoopScope);
-      HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
+      bool HasLastprivateClause =
+          CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
       (void)LoopScope.Privatize();
       CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
                            S.getInc(),
@@ -1215,104 +1660,42 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
                              CGF.EmitStopPoint(&S);
                            },
                            [](CodeGenFunction &) {});
+      CGF.EmitOMPSimdFinal(
+          S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
       // Emit final copy of the lastprivate variables at the end of loops.
-      if (HasLastprivateClause) {
-        CGF.EmitOMPLastprivateClauseFinal(S);
-      }
+      if (HasLastprivateClause)
+        CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
       CGF.EmitOMPReductionClauseFinal(S);
+      emitPostUpdateForReductionClause(
+          CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
     }
-    CGF.EmitOMPSimdFinal(S);
+    CGF.EmitOMPLinearClauseFinal(
+        S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
     // Emit: if (PreCond) - end.
     if (ContBlock) {
       CGF.EmitBranch(ContBlock);
       CGF.EmitBlock(ContBlock, true);
     }
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPForOuterLoop(
-    OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic,
+void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
     const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
     Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
   auto &RT = CGM.getOpenMPRuntime();
 
-  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
-  const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
-
-  assert((Ordered ||
-          !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
-         "static non-chunked schedule does not need outer loop");
-
-  // Emit outer loop.
-  //
-  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
-  // When schedule(dynamic,chunk_size) is specified, the iterations are
-  // distributed to threads in the team in chunks as the threads request them.
-  // Each thread executes a chunk of iterations, then requests another chunk,
-  // until no chunks remain to be distributed. Each chunk contains chunk_size
-  // iterations, except for the last chunk to be distributed, which may have
-  // fewer iterations. When no chunk_size is specified, it defaults to 1.
-  //
-  // When schedule(guided,chunk_size) is specified, the iterations are assigned
-  // to threads in the team in chunks as the executing threads request them.
-  // Each thread executes a chunk of iterations, then requests another chunk,
-  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
-  // each chunk is proportional to the number of unassigned iterations divided
-  // by the number of threads in the team, decreasing to 1. For a chunk_size
-  // with value k (greater than 1), the size of each chunk is determined in the
-  // same way, with the restriction that the chunks do not contain fewer than k
-  // iterations (except for the last chunk to be assigned, which may have fewer
-  // than k iterations).
-  //
-  // When schedule(auto) is specified, the decision regarding scheduling is
-  // delegated to the compiler and/or runtime system. The programmer gives the
-  // implementation the freedom to choose any possible mapping of iterations to
-  // threads in the team.
-  //
-  // When schedule(runtime) is specified, the decision regarding scheduling is
-  // deferred until run time, and the schedule and chunk size are taken from the
-  // run-sched-var ICV. If the ICV is set to auto, the schedule is
-  // implementation defined
-  //
-  // while(__kmpc_dispatch_next(&LB, &UB)) {
-  //   idx = LB;
-  //   while (idx <= UB) { BODY; ++idx;
-  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
-  //   } // inner loop
-  // }
-  //
-  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
-  // When schedule(static, chunk_size) is specified, iterations are divided into
-  // chunks of size chunk_size, and the chunks are assigned to the threads in
-  // the team in a round-robin fashion in the order of the thread number.
-  //
-  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
-  //   while (idx <= UB) { BODY; ++idx; } // inner loop
-  //   LB = LB + ST;
-  //   UB = UB + ST;
-  // }
-  //
-
   const Expr *IVExpr = S.getIterationVariable();
   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
 
-  if (DynamicOrOrdered) {
-    llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
-    RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
-                           IVSize, IVSigned, Ordered, UBVal, Chunk);
-  } else {
-    RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
-                         IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
-  }
-
   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
 
   // Start the loop with a block that tests the condition.
   auto CondBlock = createBasicBlock("omp.dispatch.cond");
   EmitBlock(CondBlock);
-  LoopStack.push(CondBlock);
+  LoopStack.push(CondBlock, Builder.getCurrentDebugLocation());
 
   llvm::Value *BoolCondVal = nullptr;
   if (!DynamicOrOrdered) {
@@ -1323,8 +1706,8 @@ void CodeGenFunction::EmitOMPForOuterLoop(
     // IV < UB
     BoolCondVal = EvaluateExprAsBool(S.getCond());
   } else {
-    BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned,
-                                    IL, LB, UB, ST);
+    BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
+                                 LB, UB, ST);
   }
 
   // If there are any cleanups between here and the loop-exit scope,
@@ -1384,8 +1767,167 @@ void CodeGenFunction::EmitOMPForOuterLoop(
   EmitBlock(LoopExit.getBlock());
 
   // Tell the runtime we are done.
-  if (!DynamicOrOrdered)
-    RT.emitForStaticFinish(*this, S.getLocEnd());
+  auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
+    if (!DynamicOrOrdered)
+      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
+  };
+  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
+}
+
+void CodeGenFunction::EmitOMPForOuterLoop(
+    const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
+    const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
+    Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+  auto &RT = CGM.getOpenMPRuntime();
+
+  // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
+  const bool DynamicOrOrdered =
+      Ordered || RT.isDynamic(ScheduleKind.Schedule);
+
+  assert((Ordered ||
+          !RT.isStaticNonchunked(ScheduleKind.Schedule,
+                                 /*Chunked=*/Chunk != nullptr)) &&
+         "static non-chunked schedule does not need outer loop");
+
+  // Emit outer loop.
+  //
+  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+  // When schedule(dynamic,chunk_size) is specified, the iterations are
+  // distributed to threads in the team in chunks as the threads request them.
+  // Each thread executes a chunk of iterations, then requests another chunk,
+  // until no chunks remain to be distributed. Each chunk contains chunk_size
+  // iterations, except for the last chunk to be distributed, which may have
+  // fewer iterations. When no chunk_size is specified, it defaults to 1.
+  //
+  // When schedule(guided,chunk_size) is specified, the iterations are assigned
+  // to threads in the team in chunks as the executing threads request them.
+  // Each thread executes a chunk of iterations, then requests another chunk,
+  // until no chunks remain to be assigned. For a chunk_size of 1, the size of
+  // each chunk is proportional to the number of unassigned iterations divided
+  // by the number of threads in the team, decreasing to 1. For a chunk_size
+  // with value k (greater than 1), the size of each chunk is determined in the
+  // same way, with the restriction that the chunks do not contain fewer than k
+  // iterations (except for the last chunk to be assigned, which may have fewer
+  // than k iterations).
+  //
+  // When schedule(auto) is specified, the decision regarding scheduling is
+  // delegated to the compiler and/or runtime system. The programmer gives the
+  // implementation the freedom to choose any possible mapping of iterations to
+  // threads in the team.
+  //
+  // When schedule(runtime) is specified, the decision regarding scheduling is
+  // deferred until run time, and the schedule and chunk size are taken from the
+  // run-sched-var ICV. If the ICV is set to auto, the schedule is
+  // implementation defined
+  //
+  // while(__kmpc_dispatch_next(&LB, &UB)) {
+  //   idx = LB;
+  //   while (idx <= UB) { BODY; ++idx;
+  //   __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
+  //   } // inner loop
+  // }
+  //
+  // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+  // When schedule(static, chunk_size) is specified, iterations are divided into
+  // chunks of size chunk_size, and the chunks are assigned to the threads in
+  // the team in a round-robin fashion in the order of the thread number.
+  //
+  // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
+  //   while (idx <= UB) { BODY; ++idx; } // inner loop
+  //   LB = LB + ST;
+  //   UB = UB + ST;
+  // }
+  //
+
+  const Expr *IVExpr = S.getIterationVariable();
+  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+
+  if (DynamicOrOrdered) {
+    llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
+    RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
+                           IVSigned, Ordered, UBVal, Chunk);
+  } else {
+    RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
+                         Ordered, IL, LB, UB, ST, Chunk);
+  }
+
+  EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
+                   ST, IL, Chunk);
+}
+
+void CodeGenFunction::EmitOMPDistributeOuterLoop(
+    OpenMPDistScheduleClauseKind ScheduleKind,
+    const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
+    Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+
+  auto &RT = CGM.getOpenMPRuntime();
+
+  // Emit outer loop.
+  // Same behavior as a OMPForOuterLoop, except that schedule cannot be
+  // dynamic
+  //
+
+  const Expr *IVExpr = S.getIterationVariable();
+  const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+  const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+
+  RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
+                              IVSize, IVSigned, /* Ordered = */ false,
+                              IL, LB, UB, ST, Chunk);
+
+  EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
+                   S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
+}
+
+void CodeGenFunction::EmitOMPDistributeParallelForDirective(
+    const OMPDistributeParallelForDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_distribute_parallel_for,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for,
+                                        /*HasCancel=*/false);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
+
+void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
+    const OMPDistributeParallelForSimdDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_distribute_parallel_for_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
+
+void CodeGenFunction::EmitOMPDistributeSimdDirective(
+    const OMPDistributeSimdDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_distribute_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
+}
+
+void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
+    const OMPTargetParallelForSimdDirective &S) {
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_target_parallel_for_simd,
+      [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+        OMPLoopScope PreInitScope(CGF, S);
+        CGF.EmitStmt(
+            cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+      });
 }
 
 /// \brief Emit a helper variable and return corresponding lvalue.
@@ -1408,42 +1950,6 @@ namespace {
   };
 } // namespace
 
-static std::pair<llvm::Value * /*Chunk*/, ScheduleKindModifiersTy>
-emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
-                   bool OuterRegion) {
-  // Detect the loop schedule kind and chunk.
-  auto ScheduleKind = OMPC_SCHEDULE_unknown;
-  OpenMPScheduleClauseModifier M1 = OMPC_SCHEDULE_MODIFIER_unknown;
-  OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;
-  llvm::Value *Chunk = nullptr;
-  if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
-    ScheduleKind = C->getScheduleKind();
-    M1 = C->getFirstScheduleModifier();
-    M2 = C->getSecondScheduleModifier();
-    if (const auto *Ch = C->getChunkSize()) {
-      if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) {
-        if (OuterRegion) {
-          const VarDecl *ImpVar = cast<VarDecl>(ImpRef->getDecl());
-          CGF.EmitVarDecl(*ImpVar);
-          CGF.EmitStoreThroughLValue(
-              CGF.EmitAnyExpr(Ch),
-              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar),
-                                 ImpVar->getType()));
-        } else {
-          Ch = ImpRef;
-        }
-      }
-      if (!C->getHelperChunkSize() || !OuterRegion) {
-        Chunk = CGF.EmitScalarExpr(Ch);
-        Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(),
-                                         S.getIterationVariable()->getType(),
-                                         S.getLocStart());
-      }
-    }
-  }
-  return std::make_pair(Chunk, ScheduleKindModifiersTy(ScheduleKind, M1, M2));
-}
-
 bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
   // Emit the loop iteration variable.
   auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
@@ -1464,6 +1970,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
   bool HasLastprivateClause;
   // Check pre-condition.
   {
+    OMPLoopScope PreInitScope(*this, S);
     // Skip the entire loop if we don't meet the precondition.
     // If the condition constant folds and can be elided, avoid emitting the
     // whole loop.
@@ -1481,24 +1988,34 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       incrementProfileCounter(&S);
     }
 
+    bool Ordered = false;
+    if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
+      if (OrderedClause->getNumForLoops())
+        RT.emitDoacrossInit(*this, S);
+      else
+        Ordered = true;
+    }
+
+    llvm::DenseSet<const Expr *> EmittedFinals;
     emitAlignedClause(*this, S);
     EmitOMPLinearClauseInit(S);
+    // Emit helper vars inits.
+    LValue LB =
+        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+    LValue UB =
+        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+    LValue ST =
+        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
+    LValue IL =
+        EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
+
     // Emit 'then' code.
     {
-      // Emit helper vars inits.
-      LValue LB =
-          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
-      LValue UB =
-          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
-      LValue ST =
-          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
-      LValue IL =
-          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
-
       OMPPrivateScope LoopScope(*this);
       if (EmitOMPFirstprivateClause(S, LoopScope)) {
         // Emit implicit barrier to synchronize threads and avoid data races on
-        // initialization of firstprivate variables.
+        // initialization of firstprivate variables and post-update of
+        // lastprivate variables.
         CGM.getOpenMPRuntime().emitBarrierCall(
             *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
             /*ForceSimpleCall=*/true);
@@ -1506,28 +2023,31 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       EmitOMPPrivateClause(S, LoopScope);
       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
       EmitOMPReductionClauseInit(S, LoopScope);
-      emitPrivateLoopCounters(*this, LoopScope, S.counters(),
-                              S.private_counters());
-      emitPrivateLinearVars(*this, S, LoopScope);
+      EmitOMPPrivateLoopCounters(S, LoopScope);
+      EmitOMPLinearClause(S, LoopScope);
       (void)LoopScope.Privatize();
 
       // Detect the loop schedule kind and chunk.
-      llvm::Value *Chunk;
-      OpenMPScheduleClauseKind ScheduleKind;
-      auto ScheduleInfo =
-          emitScheduleClause(*this, S, /*OuterRegion=*/false);
-      Chunk = ScheduleInfo.first;
-      ScheduleKind = ScheduleInfo.second.Kind;
-      const OpenMPScheduleClauseModifier M1 = ScheduleInfo.second.M1;
-      const OpenMPScheduleClauseModifier M2 = ScheduleInfo.second.M2;
+      llvm::Value *Chunk = nullptr;
+      OpenMPScheduleTy ScheduleKind;
+      if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
+        ScheduleKind.Schedule = C->getScheduleKind();
+        ScheduleKind.M1 = C->getFirstScheduleModifier();
+        ScheduleKind.M2 = C->getSecondScheduleModifier();
+        if (const auto *Ch = C->getChunkSize()) {
+          Chunk = EmitScalarExpr(Ch);
+          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
+                                       S.getIterationVariable()->getType(),
+                                       S.getLocStart());
+        }
+      }
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
-      const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
       // If the static schedule kind is specified or if the ordered clause is
       // specified, and if no monotonic modifier is specified, the effect will
       // be as if the monotonic modifier was specified.
-      if (RT.isStaticNonchunked(ScheduleKind,
+      if (RT.isStaticNonchunked(ScheduleKind.Schedule,
                                 /* Chunked */ Chunk != nullptr) &&
           !Ordered) {
         if (isOpenMPSimdDirective(S.getDirectiveKind()))
@@ -1557,28 +2077,46 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
                          [](CodeGenFunction &) {});
         EmitBlock(LoopExit.getBlock());
         // Tell the runtime we are done.
-        RT.emitForStaticFinish(*this, S.getLocStart());
+        auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+          CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
+        };
+        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
       } else {
-        const bool IsMonotonic = Ordered ||
-                                 ScheduleKind == OMPC_SCHEDULE_static ||
-                                 ScheduleKind == OMPC_SCHEDULE_unknown ||
-                                 M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
-                                 M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
+        const bool IsMonotonic =
+            Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
+            ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
+            ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
+            ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
                             IL.getAddress(), Chunk);
       }
+      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+        EmitOMPSimdFinal(S,
+                         [&](CodeGenFunction &CGF) -> llvm::Value * {
+                           return CGF.Builder.CreateIsNotNull(
+                               CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+                         });
+      }
       EmitOMPReductionClauseFinal(S);
+      // Emit post-update of the reduction variables if IsLastIter != 0.
+      emitPostUpdateForReductionClause(
+          *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+            return CGF.Builder.CreateIsNotNull(
+                CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+          });
       // Emit final copy of the lastprivate variables if IsLastIter != 0.
       if (HasLastprivateClause)
         EmitOMPLastprivateClauseFinal(
-            S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
-    }
-    if (isOpenMPSimdDirective(S.getDirectiveKind())) {
-      EmitOMPSimdFinal(S);
+            S, isOpenMPSimdDirective(S.getDirectiveKind()),
+            Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
     }
+    EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+      return CGF.Builder.CreateIsNotNull(
+          CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+    });
     // We're now done with the loop, so jump to the continuation block.
     if (ContBlock) {
       EmitBranch(ContBlock);
@@ -1589,13 +2127,17 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
+                                          PrePostActionTy &) {
+    OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
-                                              S.hasCancel());
+  {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
+                                                S.hasCancel());
+  }
 
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
@@ -1604,12 +2146,15 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
+                                          PrePostActionTy &) {
     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+  {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+  }
 
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
@@ -1626,12 +2171,12 @@ static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
   return LVal;
 }
 
-OpenMPDirectiveKind
-CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
+void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
   auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
   auto *CS = dyn_cast<CompoundStmt>(Stmt);
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
+                                                    PrePostActionTy &) {
     auto &C = CGF.CGM.getContext();
     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
     // Emit helper vars inits.
@@ -1697,7 +2242,8 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     CodeGenFunction::OMPPrivateScope LoopScope(CGF);
     if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
       // Emit implicit barrier to synchronize threads and avoid data races on
-      // initialization of firstprivate variables.
+      // initialization of firstprivate variables and post-update of lastprivate
+      // variables.
       CGF.CGM.getOpenMPRuntime().emitBarrierCall(
           CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
           /*ForceSimpleCall=*/true);
@@ -1708,8 +2254,10 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     (void)LoopScope.Privatize();
 
     // Emit static non-chunked loop.
+    OpenMPScheduleTy ScheduleKind;
+    ScheduleKind.Schedule = OMPC_SCHEDULE_static;
     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
-        CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
+        CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
         /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
         UB.getAddress(), ST.getAddress());
     // UB = min(UB, GlobalUB);
@@ -1723,14 +2271,24 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
                          [](CodeGenFunction &) {});
     // Tell the runtime we are done.
-    CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
+    auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+      CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
+    };
+    CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
     CGF.EmitOMPReductionClauseFinal(S);
+    // Emit post-update of the reduction variables if IsLastIter != 0.
+    emitPostUpdateForReductionClause(
+        CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
+          return CGF.Builder.CreateIsNotNull(
+              CGF.EmitLoadOfScalar(IL, S.getLocStart()));
+        });
 
     // Emit final copy of the lastprivate variables if IsLastIter != 0.
     if (HasLastprivates)
       CGF.EmitOMPLastprivateClauseFinal(
-          S, CGF.Builder.CreateIsNotNull(
-                 CGF.EmitLoadOfScalar(IL, S.getLocStart())));
+          S, /*NoFinals=*/false,
+          CGF.Builder.CreateIsNotNull(
+              CGF.EmitLoadOfScalar(IL, S.getLocStart())));
   };
 
   bool HasCancel = false;
@@ -1738,6 +2296,7 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     HasCancel = OSD->hasCancel();
   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
     HasCancel = OPSD->hasCancel();
+  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
                                               HasCancel);
   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
@@ -1749,23 +2308,25 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
                                            OMPD_unknown);
   }
-  return OMPD_sections;
 }
 
 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  OpenMPDirectiveKind EmittedAs = EmitSections(S);
+  {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    EmitSections(S);
+  }
   // Emit an implicit barrier at the end.
   if (!S.getSingleClause<OMPNowaitClause>()) {
-    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+                                           OMPD_sections);
   }
 }
 
 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
                                               S.hasCancel());
 }
@@ -1776,8 +2337,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
   llvm::SmallVector<const Expr *, 8> SrcExprs;
   llvm::SmallVector<const Expr *, 8> AssignmentOps;
   // Check if there are any 'copyprivate' clauses associated with this
-  // 'single'
-  // construct.
+  // 'single' construct.
   // Build a list of copyprivate variables along with helper expressions
   // (<source>, <destination>, <destination>=<source> expressions)
   for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
@@ -1788,24 +2348,24 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
     AssignmentOps.append(C->assignment_ops().begin(),
                          C->assignment_ops().end());
   }
-  LexicalScope Scope(*this, S.getSourceRange());
   // Emit code for 'single' region along with 'copyprivate' clauses
-  bool HasFirstprivates;
-  auto &&CodeGen = [&S, &HasFirstprivates](CodeGenFunction &CGF) {
-    CodeGenFunction::OMPPrivateScope SingleScope(CGF);
-    HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
+    OMPPrivateScope SingleScope(CGF);
+    (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
     CGF.EmitOMPPrivateClause(S, SingleScope);
     (void)SingleScope.Privatize();
-
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
-  CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
-                                          CopyprivateVars, DestExprs, SrcExprs,
-                                          AssignmentOps);
+  {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+    CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
+                                            CopyprivateVars, DestExprs,
+                                            SrcExprs, AssignmentOps);
+  }
   // Emit an implicit barrier at the end (to avoid data race on firstprivate
   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
-  if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
-      CopyprivateVars.empty()) {
+  if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
     CGM.getOpenMPRuntime().emitBarrierCall(
         *this, S.getLocStart(),
         S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
@@ -1813,21 +2373,23 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
 }
 
 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
   Expr *Hint = nullptr;
   if (auto *HintClause = S.getSingleClause<OMPHintClause>())
     Hint = HintClause->getHint();
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitCriticalRegion(*this,
                                             S.getDirectiveName().getAsString(),
                                             CodeGen, S.getLocStart(), Hint);
@@ -1837,9 +2399,8 @@ void CodeGenFunction::EmitOMPParallelForDirective(
     const OMPParallelForDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
-  LexicalScope Scope(*this, S.getSourceRange());
-  (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
     CGF.EmitOMPWorksharingLoop(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
@@ -1849,9 +2410,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
     const OMPParallelForSimdDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
-  LexicalScope Scope(*this, S.getSourceRange());
-  (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
     CGF.EmitOMPWorksharingLoop(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
@@ -1861,93 +2420,148 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
     const OMPParallelSectionsDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'sections' directive.
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
-    (void)CGF.EmitSections(S);
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitSections(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+                                                const RegionCodeGenTy &BodyGen,
+                                                const TaskGenTy &TaskGen,
+                                                OMPTaskDataTy &Data) {
   // Emit outlined function for task construct.
-  LexicalScope Scope(*this, S.getSourceRange());
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto *I = CS->getCapturedDecl()->param_begin();
   auto *PartId = std::next(I);
+  auto *TaskT = std::next(I, 4);
+  // Check if the task is final
+  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
+    // If the condition constant folds and can be elided, try to avoid emitting
+    // the condition and the dead arm of the if/else.
+    auto *Cond = Clause->getCondition();
+    bool CondConstant;
+    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
+      Data.Final.setInt(CondConstant);
+    else
+      Data.Final.setPointer(EvaluateExprAsBool(Cond));
+  } else {
+    // By default the task is not final.
+    Data.Final.setInt(/*IntVal=*/false);
+  }
+  // Check if the task has 'priority' clause.
+  if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
+    // Runtime currently does not support codegen for priority clause argument.
+    // TODO: Add codegen for priority clause arg when runtime lib support it.
+    auto *Prio = Clause->getPriority();
+    Data.Priority.setInt(Prio);
+    Data.Priority.setPointer(EmitScalarConversion(
+        EmitScalarExpr(Prio), Prio->getType(),
+        getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+        Prio->getExprLoc()));
+  }
   // The first function argument for tasks is a thread id, the second one is a
   // part id (0 for tied tasks, >=0 for untied task).
   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
   // Get list of private variables.
-  llvm::SmallVector<const Expr *, 8> PrivateVars;
-  llvm::SmallVector<const Expr *, 8> PrivateCopies;
   for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
     auto IRef = C->varlist_begin();
     for (auto *IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
-        PrivateVars.push_back(*IRef);
-        PrivateCopies.push_back(IInit);
+        Data.PrivateVars.push_back(*IRef);
+        Data.PrivateCopies.push_back(IInit);
       }
       ++IRef;
     }
   }
   EmittedAsPrivate.clear();
   // Get list of firstprivate variables.
-  llvm::SmallVector<const Expr *, 8> FirstprivateVars;
-  llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
-  llvm::SmallVector<const Expr *, 8> FirstprivateInits;
   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto IElemInitRef = C->inits().begin();
     for (auto *IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
-        FirstprivateVars.push_back(*IRef);
-        FirstprivateCopies.push_back(IInit);
-        FirstprivateInits.push_back(*IElemInitRef);
+        Data.FirstprivateVars.push_back(*IRef);
+        Data.FirstprivateCopies.push_back(IInit);
+        Data.FirstprivateInits.push_back(*IElemInitRef);
       }
-      ++IRef, ++IElemInitRef;
+      ++IRef;
+      ++IElemInitRef;
     }
   }
-  // Build list of dependences.
-  llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
-      Dependences;
-  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
-    for (auto *IRef : C->varlists()) {
-      Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+  // Get list of lastprivate variables (for taskloops).
+  llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
+  for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+    auto IRef = C->varlist_begin();
+    auto ID = C->destination_exprs().begin();
+    for (auto *IInit : C->private_copies()) {
+      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+        Data.LastprivateVars.push_back(*IRef);
+        Data.LastprivateCopies.push_back(IInit);
+      }
+      LastprivateDstsOrigs.insert(
+          {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
+           cast<DeclRefExpr>(*IRef)});
+      ++IRef;
+      ++ID;
     }
   }
-  auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
-      CodeGenFunction &CGF) {
+  // Build list of dependences.
+  for (const auto *C : S.getClausesOfKind<OMPDependClause>())
+    for (auto *IRef : C->varlists())
+      Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
+  auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
+      CodeGenFunction &CGF, PrePostActionTy &Action) {
     // Set proper addresses for generated private copies.
-    auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
     OMPPrivateScope Scope(CGF);
-    if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
+    if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
+        !Data.LastprivateVars.empty()) {
       auto *CopyFn = CGF.Builder.CreateLoad(
           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
       auto *PrivatesPtr = CGF.Builder.CreateLoad(
           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
       // Map privates.
-      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
-          PrivatePtrs;
+      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
       llvm::SmallVector<llvm::Value *, 16> CallArgs;
       CallArgs.push_back(PrivatesPtr);
-      for (auto *E : PrivateVars) {
+      for (auto *E : Data.PrivateVars) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+        Address PrivatePtr = CGF.CreateMemTemp(
+            CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
+        PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+        CallArgs.push_back(PrivatePtr.getPointer());
+      }
+      for (auto *E : Data.FirstprivateVars) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
         Address PrivatePtr =
-            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
+            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
+                              ".firstpriv.ptr.addr");
         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
         CallArgs.push_back(PrivatePtr.getPointer());
       }
-      for (auto *E : FirstprivateVars) {
+      for (auto *E : Data.LastprivateVars) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
         Address PrivatePtr =
-            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
+            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
+                              ".lastpriv.ptr.addr");
         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
         CallArgs.push_back(PrivatePtr.getPointer());
       }
       CGF.EmitRuntimeCall(CopyFn, CallArgs);
+      for (auto &&Pair : LastprivateDstsOrigs) {
+        auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
+        DeclRefExpr DRE(
+            const_cast<VarDecl *>(OrigVD),
+            /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
+                OrigVD) != nullptr,
+            Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
+        Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
+          return CGF.EmitLValue(&DRE).getAddress();
+        });
+      }
       for (auto &&Pair : PrivatePtrs) {
         Address Replacement(CGF.Builder.CreateLoad(Pair.second),
                             CGF.getContext().getDeclAlign(Pair.first));
@@ -1955,30 +2569,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
       }
     }
     (void)Scope.Privatize();
-    if (*PartId) {
-      // TODO: emit code for untied tasks.
-    }
-    CGF.EmitStmt(CS->getCapturedStmt());
+
+    Action.Enter(CGF);
+    BodyGen(CGF);
   };
-  auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
-      S, *I, OMPD_task, CodeGen);
-  // Check if we should emit tied or untied task.
-  bool Tied = !S.getSingleClause<OMPUntiedClause>();
-  // Check if the task is final
-  llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
-  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
-    // If the condition constant folds and can be elided, try to avoid emitting
-    // the condition and the dead arm of the if/else.
-    auto *Cond = Clause->getCondition();
-    bool CondConstant;
-    if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
-      Final.setInt(CondConstant);
-    else
-      Final.setPointer(EvaluateExprAsBool(Cond));
-  } else {
-    // By default the task is not final.
-    Final.setInt(/*IntVal=*/false);
-  }
+  auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
+      Data.NumberOfParts);
+  OMPLexicalScope Scope(*this, S);
+  TaskGen(*this, OutlinedFn, Data);
+}
+
+void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
+  // Emit outlined function for task construct.
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
   const Expr *IfCond = nullptr;
   for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
@@ -1988,10 +2593,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
       break;
     }
   }
-  CGM.getOpenMPRuntime().emitTaskCall(
-      *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
-      CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
-      FirstprivateCopies, FirstprivateInits, Dependences);
+
+  OMPTaskDataTy Data;
+  // Check if we should emit tied or untied task.
+  Data.Tied = !S.getSingleClause<OMPUntiedClause>();
+  auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitStmt(CS->getCapturedStmt());
+  };
+  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
+                    IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
+                            const OMPTaskDataTy &Data) {
+    CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
+                                            SharedsTy, CapturedStruct, IfCond,
+                                            Data);
+  };
+  EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(
@@ -2009,10 +2625,11 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
 
 void CodeGenFunction::EmitOMPTaskgroupDirective(
     const OMPTaskgroupDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
 }
 
@@ -2026,9 +2643,130 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
   }(), S.getLocStart());
 }
 
+void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
+  // Emit the loop iteration variable.
+  auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
+  auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
+  EmitVarDecl(*IVDecl);
+
+  // Emit the iterations count variable.
+  // If it is not a variable, Sema decided to calculate iterations count on each
+  // iteration (e.g., it is foldable into a constant).
+  if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+    EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+    // Emit calculation of the iterations count.
+    EmitIgnoredExpr(S.getCalcLastIteration());
+  }
+
+  auto &RT = CGM.getOpenMPRuntime();
+
+  // Check pre-condition.
+  {
+    OMPLoopScope PreInitScope(*this, S);
+    // Skip the entire loop if we don't meet the precondition.
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return;
+    } else {
+      auto *ThenBlock = createBasicBlock("omp.precond.then");
+      ContBlock = createBasicBlock("omp.precond.end");
+      emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
+                  getProfileCount(&S));
+      EmitBlock(ThenBlock);
+      incrementProfileCounter(&S);
+    }
+
+    // Emit 'then' code.
+    {
+      // Emit helper vars inits.
+      LValue LB =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+      LValue UB =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+      LValue ST =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
+      LValue IL =
+          EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
+
+      OMPPrivateScope LoopScope(*this);
+      EmitOMPPrivateLoopCounters(S, LoopScope);
+      (void)LoopScope.Privatize();
+
+      // Detect the distribute schedule kind and chunk.
+      llvm::Value *Chunk = nullptr;
+      OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
+      if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
+        ScheduleKind = C->getDistScheduleKind();
+        if (const auto *Ch = C->getChunkSize()) {
+          Chunk = EmitScalarExpr(Ch);
+          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
+          S.getIterationVariable()->getType(),
+          S.getLocStart());
+        }
+      }
+      const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+      const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+
+      // OpenMP [2.10.8, distribute Construct, Description]
+      // If dist_schedule is specified, kind must be static. If specified,
+      // iterations are divided into chunks of size chunk_size, chunks are
+      // assigned to the teams of the league in a round-robin fashion in the
+      // order of the team number. When no chunk_size is specified, the
+      // iteration space is divided into chunks that are approximately equal
+      // in size, and at most one chunk is distributed to each team of the
+      // league. The size of the chunks is unspecified in this case.
+      if (RT.isStaticNonchunked(ScheduleKind,
+                                /* Chunked */ Chunk != nullptr)) {
+        RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
+                             IVSize, IVSigned, /* Ordered = */ false,
+                             IL.getAddress(), LB.getAddress(),
+                             UB.getAddress(), ST.getAddress());
+        auto LoopExit =
+            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
+        // UB = min(UB, GlobalUB);
+        EmitIgnoredExpr(S.getEnsureUpperBound());
+        // IV = LB;
+        EmitIgnoredExpr(S.getInit());
+        // while (idx <= UB) { BODY; ++idx; }
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+                         S.getInc(),
+                         [&S, LoopExit](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, LoopExit);
+                           CGF.EmitStopPoint(&S);
+                         },
+                         [](CodeGenFunction &) {});
+        EmitBlock(LoopExit.getBlock());
+        // Tell the runtime we are done.
+        RT.emitForStaticFinish(*this, S.getLocStart());
+      } else {
+        // Emit the outer loop, which requests its work chunk [LB..UB] from
+        // runtime and runs the inner loop to process it.
+        EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
+                            LB.getAddress(), UB.getAddress(), ST.getAddress(),
+                            IL.getAddress(), Chunk);
+      }
+    }
+
+    // We're now done with the loop, so jump to the continuation block.
+    if (ContBlock) {
+      EmitBranch(ContBlock);
+      EmitBlock(ContBlock, true);
+    }
+  }
+}
+
 void CodeGenFunction::EmitOMPDistributeDirective(
     const OMPDistributeDirective &S) {
-  llvm_unreachable("CodeGen for 'omp distribute' is not supported yet.");
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitOMPDistributeLoop(S);
+  };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
+                                              false);
 }
 
 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
@@ -2042,11 +2780,14 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
 }
 
 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
-  if (!S.getAssociatedStmt())
+  if (!S.getAssociatedStmt()) {
+    for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
+      CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
     return;
-  LexicalScope Scope(*this, S.getSourceRange());
+  }
   auto *C = S.getSingleClause<OMPSIMDClause>();
-  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
+                                 PrePostActionTy &Action) {
     if (C) {
       auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
       llvm::SmallVector<llvm::Value *, 16> CapturedVars;
@@ -2054,10 +2795,12 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
       auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
       CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
     } else {
+      Action.Enter(CGF);
       CGF.EmitStmt(
           cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
     }
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
 }
 
@@ -2104,8 +2847,9 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
   if (LVal.isGlobalReg()) {
     CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
   } else {
-    CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::SequentiallyConsistent
-                                             : llvm::Monotonic,
+    CGF.EmitAtomicStore(RVal, LVal,
+                        IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
+                                 : llvm::AtomicOrdering::Monotonic,
                         LVal.isVolatile(), /*IsInit=*/false);
   }
 }
@@ -2138,10 +2882,11 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
   LValue VLValue = CGF.EmitLValue(V);
   RValue Res = XLValue.isGlobalReg()
                    ? CGF.EmitLoadOfLValue(XLValue, Loc)
-                   : CGF.EmitAtomicLoad(XLValue, Loc,
-                                        IsSeqCst ? llvm::SequentiallyConsistent
-                                                 : llvm::Monotonic,
-                                        XLValue.isVolatile());
+                   : CGF.EmitAtomicLoad(
+                         XLValue, Loc,
+                         IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
+                                  : llvm::AtomicOrdering::Monotonic,
+                         XLValue.isVolatile());
   // OpenMP, 2.12.6, atomic Construct
   // Any atomic construct with a seq_cst clause forces the atomically
   // performed operation to include an implicit flush operation without a
@@ -2297,7 +3042,8 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
   assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
   LValue XLValue = CGF.EmitLValue(X);
   RValue ExprRValue = CGF.EmitAnyExpr(E);
-  auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
+  auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
+                     : llvm::AtomicOrdering::Monotonic;
   auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
   auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
   auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
@@ -2346,7 +3092,8 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
   LValue VLValue = CGF.EmitLValue(V);
   LValue XLValue = CGF.EmitLValue(X);
   RValue ExprRValue = CGF.EmitAnyExpr(E);
-  auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
+  auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
+                     : llvm::AtomicOrdering::Monotonic;
   QualType NewVValType;
   if (UE) {
     // 'x' is updated with some additional value.
@@ -2472,6 +3219,13 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   case OMPC_nogroup:
   case OMPC_num_tasks:
   case OMPC_hint:
+  case OMPC_dist_schedule:
+  case OMPC_defaultmap:
+  case OMPC_uniform:
+  case OMPC_to:
+  case OMPC_from:
+  case OMPC_use_device_ptr:
+  case OMPC_is_device_ptr:
     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
   }
 }
@@ -2501,18 +3255,39 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
     }
   }
 
-  LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
+                                            PrePostActionTy &) {
     CGF.EmitStopPoint(CS);
     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
                       S.getV(), S.getExpr(), S.getUpdateExpr(),
                       S.isXLHSInRHSPart(), S.getLocStart());
   };
+  OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
 }
 
+std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
+CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
+    CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
+    bool IsOffloadEntry) {
+  llvm::Function *OutlinedFn = nullptr;
+  llvm::Constant *OutlinedFnID = nullptr;
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    OMPPrivateScope PrivateScope(CGF);
+    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+    CGF.EmitOMPPrivateClause(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+
+    Action.Enter(CGF);
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  };
+  // Emit target region as a standalone region.
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
+  return std::make_pair(OutlinedFn, OutlinedFnID);
+}
+
 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
-  LexicalScope Scope(*this, S.getSourceRange());
   const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
 
   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
@@ -2558,15 +3333,50 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
     ParentName =
         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
 
-  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
-                                                    IsOffloadEntry);
-
+  std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
+      CGM, S, ParentName, IsOffloadEntry);
+  OMPLexicalScope Scope(*this, S);
   CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
                                         CapturedVars);
 }
 
-void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
-  llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
+static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
+                                        const OMPExecutableDirective &S,
+                                        OpenMPDirectiveKind InnermostKind,
+                                        const RegionCodeGenTy &CodeGen) {
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
+      emitParallelOrTeamsOutlinedFunction(S,
+          *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+
+  const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
+  const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
+  const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
+  if (NT || TL) {
+    Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
+    Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
+
+    CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
+                                                  S.getLocStart());
+  }
+
+  OMPLexicalScope Scope(CGF, S);
+  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
+  CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
+                                           CapturedVars);
+}
+
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+  // Emit parallel region as a standalone region.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    OMPPrivateScope PrivateScope(CGF);
+    (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+    CGF.EmitOMPPrivateClause(S, PrivateScope);
+    (void)PrivateScope.Privatize();
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  };
+  emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPCancellationPointDirective(
@@ -2590,37 +3400,261 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
 
 CodeGenFunction::JumpDest
 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
-  if (Kind == OMPD_parallel || Kind == OMPD_task)
+  if (Kind == OMPD_parallel || Kind == OMPD_task ||
+      Kind == OMPD_target_parallel)
     return ReturnBlock;
   assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
-         Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
-  return BreakContinueStack.back().BreakBlock;
+         Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
+         Kind == OMPD_distribute_parallel_for ||
+         Kind == OMPD_target_parallel_for);
+  return OMPCancelStack.getExitBlock();
 }
 
 // Generate the instructions for '#pragma omp target data' directive.
 void CodeGenFunction::EmitOMPTargetDataDirective(
     const OMPTargetDataDirective &S) {
-  // emit the code inside the construct for now
+  // The target data enclosed region is implemented just by emitting the
+  // statement.
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+  };
+
+  // If we don't have target devices, don't bother emitting the data mapping
+  // code.
+  if (CGM.getLangOpts().OMPTargetTriples.empty()) {
+    OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
+
+    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_data,
+                                                CodeGen);
+    return;
+  }
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto *C = S.getSingleClause<OMPIfClause>())
+    IfCond = C->getCondition();
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>())
+    Device = C->getDevice();
+
+  CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, CodeGen);
+}
+
+void CodeGenFunction::EmitOMPTargetEnterDataDirective(
+    const OMPTargetEnterDataDirective &S) {
+  // If we don't have target devices, don't bother emitting the data mapping
+  // code.
+  if (CGM.getLangOpts().OMPTargetTriples.empty())
+    return;
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto *C = S.getSingleClause<OMPIfClause>())
+    IfCond = C->getCondition();
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>())
+    Device = C->getDevice();
+
+  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
+}
+
+void CodeGenFunction::EmitOMPTargetExitDataDirective(
+    const OMPTargetExitDataDirective &S) {
+  // If we don't have target devices, don't bother emitting the data mapping
+  // code.
+  if (CGM.getLangOpts().OMPTargetTriples.empty())
+    return;
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto *C = S.getSingleClause<OMPIfClause>())
+    IfCond = C->getCondition();
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>())
+    Device = C->getDevice();
+
+  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
+}
+
+void CodeGenFunction::EmitOMPTargetParallelDirective(
+    const OMPTargetParallelDirective &S) {
+  // TODO: codegen for target parallel.
+}
+
+void CodeGenFunction::EmitOMPTargetParallelForDirective(
+    const OMPTargetParallelForDirective &S) {
+  // TODO: codegen for target parallel for.
+}
+
+/// Emit a helper variable and return corresponding lvalue.
+static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
+                     const ImplicitParamDecl *PVD,
+                     CodeGenFunction::OMPPrivateScope &Privates) {
+  auto *VDecl = cast<VarDecl>(Helper->getDecl());
+  Privates.addPrivate(
+      VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
+}
+
+void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
+  assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
+  // Emit outlined function for task construct.
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_target_data,
-      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+  auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
+  const Expr *IfCond = nullptr;
+  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+    if (C->getNameModifier() == OMPD_unknown ||
+        C->getNameModifier() == OMPD_taskloop) {
+      IfCond = C->getCondition();
+      break;
+    }
+  }
+
+  OMPTaskDataTy Data;
+  // Check if taskloop must be emitted without taskgroup.
+  Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
+  // TODO: Check if we should emit tied or untied task.
+  Data.Tied = true;
+  // Set scheduling for taskloop
+  if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
+    // grainsize clause
+    Data.Schedule.setInt(/*IntVal=*/false);
+    Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+  } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
+    // num_tasks clause
+    Data.Schedule.setInt(/*IntVal=*/true);
+    Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+  }
+
+  auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
+    // if (PreCond) {
+    //   for (IV in 0..LastIteration) BODY;
+    //   <Final counter/linear vars updates>;
+    // }
+    //
+
+    // Emit: if (PreCond) - begin.
+    // If the condition constant folds and can be elided, avoid emitting the
+    // whole loop.
+    bool CondConstant;
+    llvm::BasicBlock *ContBlock = nullptr;
+    OMPLoopScope PreInitScope(CGF, S);
+    if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
+      if (!CondConstant)
+        return;
+    } else {
+      auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
+      ContBlock = CGF.createBasicBlock("taskloop.if.end");
+      emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
+                  CGF.getProfileCount(&S));
+      CGF.EmitBlock(ThenBlock);
+      CGF.incrementProfileCounter(&S);
+    }
+
+    if (isOpenMPSimdDirective(S.getDirectiveKind()))
+      CGF.EmitOMPSimdInit(S);
+
+    OMPPrivateScope LoopScope(CGF);
+    // Emit helper vars inits.
+    enum { LowerBound = 5, UpperBound, Stride, LastIter };
+    auto *I = CS->getCapturedDecl()->param_begin();
+    auto *LBP = std::next(I, LowerBound);
+    auto *UBP = std::next(I, UpperBound);
+    auto *STP = std::next(I, Stride);
+    auto *LIP = std::next(I, LastIter);
+    mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
+             LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
+             LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
+    mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
+             LoopScope);
+    CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+    bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
+    (void)LoopScope.Privatize();
+    // Emit the loop iteration variable.
+    const Expr *IVExpr = S.getIterationVariable();
+    const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
+    CGF.EmitVarDecl(*IVDecl);
+    CGF.EmitIgnoredExpr(S.getInit());
+
+    // Emit the iterations count variable.
+    // If it is not a variable, Sema decided to calculate iterations count on
+    // each iteration (e.g., it is foldable into a constant).
+    if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+      CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+      // Emit calculation of the iterations count.
+      CGF.EmitIgnoredExpr(S.getCalcLastIteration());
+    }
+
+    CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
+                         S.getInc(),
+                         [&S](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, JumpDest());
+                           CGF.EmitStopPoint(&S);
+                         },
+                         [](CodeGenFunction &) {});
+    // Emit: if (PreCond) - end.
+    if (ContBlock) {
+      CGF.EmitBranch(ContBlock);
+      CGF.EmitBlock(ContBlock, true);
+    }
+    // Emit final copy of the lastprivate variables if IsLastIter != 0.
+    if (HasLastprivateClause) {
+      CGF.EmitOMPLastprivateClauseFinal(
+          S, isOpenMPSimdDirective(S.getDirectiveKind()),
+          CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
+              CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
+              (*LIP)->getType(), S.getLocStart())));
+    }
+  };
+  auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
+                    IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
+                            const OMPTaskDataTy &Data) {
+    auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
+      OMPLoopScope PreInitScope(CGF, S);
+      CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
+                                                  OutlinedFn, SharedsTy,
+                                                  CapturedStruct, IfCond, Data);
+    };
+    CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
+                                                    CodeGen);
+  };
+  EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
 }
 
 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
-  // emit the code inside the construct for now
-  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_taskloop,
-      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+  EmitOMPTaskLoopBasedDirective(S);
 }
 
 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
     const OMPTaskLoopSimdDirective &S) {
-  // emit the code inside the construct for now
-  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  CGM.getOpenMPRuntime().emitInlinedDirective(
-      *this, OMPD_taskloop_simd,
-      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+  EmitOMPTaskLoopBasedDirective(S);
 }
 
+// Generate the instructions for '#pragma omp target update' directive.
+void CodeGenFunction::EmitOMPTargetUpdateDirective(
+    const OMPTargetUpdateDirective &S) {
+  // If we don't have target devices, don't bother emitting the data mapping
+  // code.
+  if (CGM.getLangOpts().OMPTargetTriples.empty())
+    return;
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+  if (auto *C = S.getSingleClause<OMPIfClause>())
+    IfCond = C->getCondition();
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>())
+    Device = C->getDevice();
+
+  CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp
index 4fb76710..5b90ee6 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTT.cpp
@@ -44,7 +44,7 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
                                   const CXXRecordDecl *RD) {
   VTTBuilder Builder(CGM.getContext(), RD, /*GenerateDefinition=*/true);
 
-  llvm::Type *Int8PtrTy = CGM.Int8PtrTy, *Int64Ty = CGM.Int64Ty;
+  llvm::Type *Int8PtrTy = CGM.Int8PtrTy, *Int32Ty = CGM.Int32Ty;
   llvm::ArrayType *ArrayType = 
     llvm::ArrayType::get(Int8PtrTy, Builder.getVTTComponents().size());
 
@@ -75,8 +75,8 @@ CodeGenVTables::EmitVTTDefinition(llvm::GlobalVariable *VTT,
     }
 
      llvm::Value *Idxs[] = {
-       llvm::ConstantInt::get(Int64Ty, 0),
-       llvm::ConstantInt::get(Int64Ty, AddressPoint)
+       llvm::ConstantInt::get(Int32Ty, 0),
+       llvm::ConstantInt::get(Int32Ty, AddressPoint)
      };
 
      llvm::Constant *Init = llvm::ConstantExpr::getInBoundsGetElementPtr(
@@ -121,7 +121,7 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
   llvm::GlobalVariable *GV =
     CGM.CreateOrReplaceCXXRuntimeVariable(Name, ArrayType, 
                                           llvm::GlobalValue::ExternalLinkage);
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   return GV;
 }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
index a40aab2..9570550 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGVTables.cpp
@@ -156,9 +156,7 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
 
   // Clone to thunk.
   llvm::ValueToValueMapTy VMap;
-  llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap,
-                                              /*ModuleLevelChanges=*/false);
-  CGM.getModule().getFunctionList().push_back(NewFn);
+  llvm::Function *NewFn = llvm::CloneFunction(BaseFn, VMap);
   Fn->replaceAllUsesWith(NewFn);
   NewFn->takeName(Fn);
   Fn->eraseFromParent();
@@ -286,15 +284,14 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
     CGM.getCXXABI().adjustCallArgsForDestructorThunk(*this, CurGD, CallArgs);
 
   // Add the rest of the arguments.
-  for (const ParmVarDecl *PD : MD->params())
+  for (const ParmVarDecl *PD : MD->parameters())
     EmitDelegateCallArg(CallArgs, PD, PD->getLocStart());
 
   const FunctionProtoType *FPT = MD->getType()->getAs<FunctionProtoType>();
 
 #ifndef NDEBUG
-  const CGFunctionInfo &CallFnInfo =
-    CGM.getTypes().arrangeCXXMethodCall(CallArgs, FPT,
-                                       RequiredArgs::forPrototypePlus(FPT, 1));
+  const CGFunctionInfo &CallFnInfo = CGM.getTypes().arrangeCXXMethodCall(
+      CallArgs, FPT, RequiredArgs::forPrototypePlus(FPT, 1, MD));
   assert(CallFnInfo.getRegParm() == CurFnInfo->getRegParm() &&
          CallFnInfo.isNoReturn() == CurFnInfo->isNoReturn() &&
          CallFnInfo.getCallingConvention() == CurFnInfo->getCallingConvention());
@@ -607,6 +604,8 @@ llvm::Constant *CodeGenVTables::CreateVTableInitializer(
             llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
           StringRef PureCallName = CGM.getCXXABI().GetPureVirtualCallName();
           PureVirtualFn = CGM.CreateRuntimeFunction(Ty, PureCallName);
+          if (auto *F = dyn_cast<llvm::Function>(PureVirtualFn))
+            F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
           PureVirtualFn = llvm::ConstantExpr::getBitCast(PureVirtualFn,
                                                          CGM.Int8PtrTy);
         }
@@ -618,6 +617,8 @@ llvm::Constant *CodeGenVTables::CreateVTableInitializer(
           StringRef DeletedCallName =
             CGM.getCXXABI().GetDeletedVirtualCallName();
           DeletedVirtualFn = CGM.CreateRuntimeFunction(Ty, DeletedCallName);
+          if (auto *F = dyn_cast<llvm::Function>(DeletedVirtualFn))
+            F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
           DeletedVirtualFn = llvm::ConstantExpr::getBitCast(DeletedVirtualFn,
                                                          CGM.Int8PtrTy);
         }
@@ -696,7 +697,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
   CGM.setGlobalVisibility(VTable, RD);
 
   // V-tables are always unnamed_addr.
-  VTable->setUnnamedAddr(true);
+  VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   llvm::Constant *RTTI = CGM.GetAddrOfRTTIDescriptor(
       CGM.getContext().getTagDeclType(Base.getBase()));
@@ -708,7 +709,7 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
       VTLayout->getNumVTableThunks(), RTTI);
   VTable->setInitializer(Init);
   
-  CGM.EmitVTableBitSetEntries(VTable, *VTLayout.get());
+  CGM.EmitVTableTypeMetadata(VTable, *VTLayout.get());
 
   return VTable;
 }
@@ -719,7 +720,7 @@ static bool shouldEmitAvailableExternallyVTable(const CodeGenModule &CGM,
          CGM.getCXXABI().canSpeculativelyEmitVTable(RD);
 }
 
-/// Compute the required linkage of the v-table for the given class.
+/// Compute the required linkage of the vtable for the given class.
 ///
 /// Note that we only call this at the end of the translation unit.
 llvm::GlobalVariable::LinkageTypes 
@@ -793,6 +794,10 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
       return DiscardableODRLinkage;
 
     case TSK_ExplicitInstantiationDeclaration:
+      // Explicit instantiations in MSVC do not provide vtables, so we must emit
+      // our own.
+      if (getTarget().getCXXABI().isMicrosoft())
+        return DiscardableODRLinkage;
       return shouldEmitAvailableExternallyVTable(*this, RD)
                  ? llvm::GlobalVariable::AvailableExternallyLinkage
                  : llvm::GlobalVariable::ExternalLinkage;
@@ -804,7 +809,7 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
   llvm_unreachable("Invalid TemplateSpecializationKind!");
 }
 
-/// This is a callback from Sema to tell us that that a particular v-table is
+/// This is a callback from Sema to tell us that that a particular vtable is
 /// required to be emitted in this translation unit.
 ///
 /// This is only called for vtables that _must_ be emitted (mainly due to key
@@ -832,38 +837,43 @@ CodeGenVTables::GenerateClassData(const CXXRecordDecl *RD) {
 /// the translation unit.
 ///
 /// The only semantic restriction here is that the object file should
-/// not contain a v-table definition when that v-table is defined
+/// not contain a vtable definition when that vtable is defined
 /// strongly elsewhere.  Otherwise, we'd just like to avoid emitting
-/// v-tables when unnecessary.
+/// vtables when unnecessary.
 bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
   assert(RD->isDynamicClass() && "Non-dynamic classes have no VTable.");
 
+  // We always synthesize vtables if they are needed in the MS ABI. MSVC doesn't
+  // emit them even if there is an explicit template instantiation.
+  if (CGM.getTarget().getCXXABI().isMicrosoft())
+    return false;
+
   // If we have an explicit instantiation declaration (and not a
-  // definition), the v-table is defined elsewhere.
+  // definition), the vtable is defined elsewhere.
   TemplateSpecializationKind TSK = RD->getTemplateSpecializationKind();
   if (TSK == TSK_ExplicitInstantiationDeclaration)
     return true;
 
   // Otherwise, if the class is an instantiated template, the
-  // v-table must be defined here.
+  // vtable must be defined here.
   if (TSK == TSK_ImplicitInstantiation ||
       TSK == TSK_ExplicitInstantiationDefinition)
     return false;
 
   // Otherwise, if the class doesn't have a key function (possibly
-  // anymore), the v-table must be defined here.
+  // anymore), the vtable must be defined here.
   const CXXMethodDecl *keyFunction = CGM.getContext().getCurrentKeyFunction(RD);
   if (!keyFunction)
     return false;
 
   // Otherwise, if we don't have a definition of the key function, the
-  // v-table must be defined somewhere else.
+  // vtable must be defined somewhere else.
   return !keyFunction->hasBody();
 }
 
 /// Given that we're currently at the end of the translation unit, and
-/// we've emitted a reference to the v-table for this class, should
-/// we define that v-table?
+/// we've emitted a reference to the vtable for this class, should
+/// we define that vtable?
 static bool shouldEmitVTableAtEndOfTranslationUnit(CodeGenModule &CGM,
                                                    const CXXRecordDecl *RD) {
   // If vtable is internal then it has to be done.
@@ -875,7 +885,7 @@ static bool shouldEmitVTableAtEndOfTranslationUnit(CodeGenModule &CGM,
 }
 
 /// Given that at some point we emitted a reference to one or more
-/// v-tables, and that we are now at the end of the translation unit,
+/// vtables, and that we are now at the end of the translation unit,
 /// decide whether we should emit them.
 void CodeGenModule::EmitDeferredVTables() {
 #ifndef NDEBUG
@@ -889,25 +899,47 @@ void CodeGenModule::EmitDeferredVTables() {
       VTables.GenerateClassData(RD);
 
   assert(savedSize == DeferredVTables.size() &&
-         "deferred extra v-tables during v-table emission?");
+         "deferred extra vtables during vtable emission?");
   DeferredVTables.clear();
 }
 
-bool CodeGenModule::IsCFIBlacklistedRecord(const CXXRecordDecl *RD) {
-  if (RD->hasAttr<UuidAttr>() &&
-      getContext().getSanitizerBlacklist().isBlacklistedType("attr:uuid"))
+bool CodeGenModule::HasHiddenLTOVisibility(const CXXRecordDecl *RD) {
+  LinkageInfo LV = RD->getLinkageAndVisibility();
+  if (!isExternallyVisible(LV.getLinkage()))
     return true;
 
-  return getContext().getSanitizerBlacklist().isBlacklistedType(
-      RD->getQualifiedNameAsString());
+  if (RD->hasAttr<LTOVisibilityPublicAttr>() || RD->hasAttr<UuidAttr>())
+    return false;
+
+  if (getTriple().isOSBinFormatCOFF()) {
+    if (RD->hasAttr<DLLExportAttr>() || RD->hasAttr<DLLImportAttr>())
+      return false;
+  } else {
+    if (LV.getVisibility() != HiddenVisibility)
+      return false;
+  }
+
+  if (getCodeGenOpts().LTOVisibilityPublicStd) {
+    const DeclContext *DC = RD;
+    while (1) {
+      auto *D = cast<Decl>(DC);
+      DC = DC->getParent();
+      if (isa<TranslationUnitDecl>(DC->getRedeclContext())) {
+        if (auto *ND = dyn_cast<NamespaceDecl>(D))
+          if (const IdentifierInfo *II = ND->getIdentifier())
+            if (II->isStr("std") || II->isStr("stdext"))
+              return false;
+        break;
+      }
+    }
+  }
+
+  return true;
 }
 
-void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
-                                            const VTableLayout &VTLayout) {
-  if (!LangOpts.Sanitize.has(SanitizerKind::CFIVCall) &&
-      !LangOpts.Sanitize.has(SanitizerKind::CFINVCall) &&
-      !LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) &&
-      !LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast))
+void CodeGenModule::EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
+                                           const VTableLayout &VTLayout) {
+  if (!getCodeGenOpts().PrepareForLTO)
     return;
 
   CharUnits PointerWidth =
@@ -916,12 +948,8 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
   typedef std::pair<const CXXRecordDecl *, unsigned> BSEntry;
   std::vector<BSEntry> BitsetEntries;
   // Create a bit set entry for each address point.
-  for (auto &&AP : VTLayout.getAddressPoints()) {
-    if (IsCFIBlacklistedRecord(AP.first.getBase()))
-      continue;
-
+  for (auto &&AP : VTLayout.getAddressPoints())
     BitsetEntries.push_back(std::make_pair(AP.first.getBase(), AP.second));
-  }
 
   // Sort the bit set entries for determinism.
   std::sort(BitsetEntries.begin(), BitsetEntries.end(),
@@ -949,10 +977,7 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
     return E1.second < E2.second;
   });
 
-  llvm::NamedMDNode *BitsetsMD =
-      getModule().getOrInsertNamedMetadata("llvm.bitsets");
   for (auto BitsetEntry : BitsetEntries)
-    CreateVTableBitSetEntry(BitsetsMD, VTable,
-                            PointerWidth * BitsetEntry.second,
-                            BitsetEntry.first);
+    AddVTableTypeMetadata(VTable, PointerWidth * BitsetEntry.second,
+                          BitsetEntry.first);
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
index 3ccc4cd..53a376d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGValue.h
@@ -445,7 +445,7 @@ class AggValueSlot {
   // Qualifiers
   Qualifiers Quals;
 
-  unsigned short Alignment;
+  unsigned Alignment;
 
   /// DestructedFlag - This is set to true if some external code is
   /// responsible for setting up a destructor for the slot.  Otherwise
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
index 643c996..166f44f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenABITypes.cpp
@@ -26,45 +26,41 @@
 using namespace clang;
 using namespace CodeGen;
 
-CodeGenABITypes::CodeGenABITypes(ASTContext &C, llvm::Module &M,
-                                 CoverageSourceInfo *CoverageInfo)
-    : CGO(new CodeGenOptions), HSO(new HeaderSearchOptions),
-      PPO(new PreprocessorOptions),
-      CGM(new CodeGen::CodeGenModule(C, *HSO, *PPO, *CGO, M, C.getDiagnostics(),
-                                     CoverageInfo)) {}
-
-// Explicitly out-of-line because ~CodeGenModule() is private but
-// CodeGenABITypes.h is part of clang's API.
-CodeGenABITypes::~CodeGenABITypes() = default;
-
 const CGFunctionInfo &
-CodeGenABITypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
-                                                 QualType receiverType) {
-  return CGM->getTypes().arrangeObjCMessageSendSignature(MD, receiverType);
+CodeGen::arrangeObjCMessageSendSignature(CodeGenModule &CGM,
+                                         const ObjCMethodDecl *MD,
+                                         QualType receiverType) {
+  return CGM.getTypes().arrangeObjCMessageSendSignature(MD, receiverType);
 }
 
 const CGFunctionInfo &
-CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
-                                         const FunctionDecl *FD) {
-  return CGM->getTypes().arrangeFreeFunctionType(Ty, FD);
+CodeGen::arrangeFreeFunctionType(CodeGenModule &CGM,
+                                 CanQual<FunctionProtoType> Ty,
+                                 const FunctionDecl *FD) {
+  return CGM.getTypes().arrangeFreeFunctionType(Ty, FD);
 }
 
 const CGFunctionInfo &
-CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty) {
-  return CGM->getTypes().arrangeFreeFunctionType(Ty);
+CodeGen::arrangeFreeFunctionType(CodeGenModule &CGM,
+                                 CanQual<FunctionNoProtoType> Ty) {
+  return CGM.getTypes().arrangeFreeFunctionType(Ty);
 }
 
 const CGFunctionInfo &
-CodeGenABITypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
-                                      const FunctionProtoType *FTP,
-                                      const CXXMethodDecl *MD) {
-  return CGM->getTypes().arrangeCXXMethodType(RD, FTP, MD);
+CodeGen::arrangeCXXMethodType(CodeGenModule &CGM,
+                              const CXXRecordDecl *RD,
+                              const FunctionProtoType *FTP,
+                              const CXXMethodDecl *MD) {
+  return CGM.getTypes().arrangeCXXMethodType(RD, FTP, MD);
 }
 
-const CGFunctionInfo &CodeGenABITypes::arrangeFreeFunctionCall(
-    CanQualType returnType, ArrayRef<CanQualType> argTypes,
-    FunctionType::ExtInfo info, RequiredArgs args) {
-  return CGM->getTypes().arrangeLLVMFunctionInfo(
+const CGFunctionInfo &
+CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM,
+                                 CanQualType returnType,
+                                 ArrayRef<CanQualType> argTypes,
+                                 FunctionType::ExtInfo info,
+                                 RequiredArgs args) {
+  return CGM.getTypes().arrangeLLVMFunctionInfo(
       returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes,
-      info, args);
+      info, {}, args);
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
index 0a670ab..49738a2 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenAction.cpp
@@ -46,14 +46,13 @@ namespace clang {
     const CodeGenOptions &CodeGenOpts;
     const TargetOptions &TargetOpts;
     const LangOptions &LangOpts;
-    raw_pwrite_stream *AsmOutStream;
+    std::unique_ptr<raw_pwrite_stream> AsmOutStream;
     ASTContext *Context;
 
     Timer LLVMIRGeneration;
 
     std::unique_ptr<CodeGenerator> Gen;
 
-    std::unique_ptr<llvm::Module> TheModule;
     SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4>
         LinkModules;
 
@@ -69,11 +68,12 @@ namespace clang {
         const TargetOptions &TargetOpts, const LangOptions &LangOpts,
         bool TimePasses, const std::string &InFile,
         const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules,
-        raw_pwrite_stream *OS, LLVMContext &C,
+        std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
         CoverageSourceInfo *CoverageInfo = nullptr)
         : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts),
-          TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(OS),
-          Context(nullptr), LLVMIRGeneration("LLVM IR Generation Time"),
+          TargetOpts(TargetOpts), LangOpts(LangOpts),
+          AsmOutStream(std::move(OS)), Context(nullptr),
+          LLVMIRGeneration("LLVM IR Generation Time"),
           Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
                                 CodeGenOpts, C, CoverageInfo)) {
       llvm::TimePassesIsEnabled = TimePasses;
@@ -81,7 +81,10 @@ namespace clang {
         this->LinkModules.push_back(
             std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second)));
     }
-    std::unique_ptr<llvm::Module> takeModule() { return std::move(TheModule); }
+    llvm::Module *getModule() const { return Gen->GetModule(); }
+    std::unique_ptr<llvm::Module> takeModule() {
+      return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
+    }
     void releaseLinkModules() {
       for (auto &I : LinkModules)
         I.second.release();
@@ -101,8 +104,6 @@ namespace clang {
 
       Gen->Initialize(Ctx);
 
-      TheModule.reset(Gen->GetModule());
-
       if (llvm::TimePassesIsEnabled)
         LLVMIRGeneration.stopTimer();
     }
@@ -123,14 +124,14 @@ namespace clang {
       return true;
     }
 
-    void HandleInlineMethodDefinition(CXXMethodDecl *D) override {
+    void HandleInlineFunctionDefinition(FunctionDecl *D) override {
       PrettyStackTraceDecl CrashInfo(D, SourceLocation(),
                                      Context->getSourceManager(),
-                                     "LLVM IR generation of inline method");
+                                     "LLVM IR generation of inline function");
       if (llvm::TimePassesIsEnabled)
         LLVMIRGeneration.startTimer();
 
-      Gen->HandleInlineMethodDefinition(D);
+      Gen->HandleInlineFunctionDefinition(D);
 
       if (llvm::TimePassesIsEnabled)
         LLVMIRGeneration.stopTimer();
@@ -149,25 +150,12 @@ namespace clang {
       }
 
       // Silently ignore if we weren't initialized for some reason.
-      if (!TheModule)
+      if (!getModule())
         return;
 
-      // Make sure IR generation is happy with the module. This is released by
-      // the module provider.
-      llvm::Module *M = Gen->ReleaseModule();
-      if (!M) {
-        // The module has been released by IR gen on failures, do not double
-        // free.
-        TheModule.release();
-        return;
-      }
-
-      assert(TheModule.get() == M &&
-             "Unexpected module change during IR generation");
-
       // Install an inline asm handler so that diagnostics get printed through
       // our diagnostics hooks.
-      LLVMContext &Ctx = TheModule->getContext();
+      LLVMContext &Ctx = getModule()->getContext();
       LLVMContext::InlineAsmDiagHandlerTy OldHandler =
         Ctx.getInlineAsmDiagnosticHandler();
       void *OldContext = Ctx.getInlineAsmDiagnosticContext();
@@ -182,13 +170,15 @@ namespace clang {
       for (auto &I : LinkModules) {
         unsigned LinkFlags = I.first;
         CurLinkModule = I.second.get();
-        if (Linker::linkModules(*M, std::move(I.second), LinkFlags))
+        if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags))
           return;
       }
 
+      EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
+
       EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                        C.getTargetInfo().getDataLayoutString(),
-                        TheModule.get(), Action, AsmOutStream);
+                        C.getTargetInfo().getDataLayout(),
+                        getModule(), Action, std::move(AsmOutStream));
 
       Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext);
 
@@ -210,21 +200,12 @@ namespace clang {
       Gen->CompleteTentativeDefinition(D);
     }
 
-    void HandleVTable(CXXRecordDecl *RD) override {
-      Gen->HandleVTable(RD);
-    }
-
-    void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
-      Gen->HandleLinkerOptionPragma(Opts);
+    void AssignInheritanceModel(CXXRecordDecl *RD) override {
+      Gen->AssignInheritanceModel(RD);
     }
 
-    void HandleDetectMismatch(llvm::StringRef Name,
-                                      llvm::StringRef Value) override {
-      Gen->HandleDetectMismatch(Name, Value);
-    }
-
-    void HandleDependentLibrary(llvm::StringRef Opts) override {
-      Gen->HandleDependentLibrary(Opts);
+    void HandleVTable(CXXRecordDecl *RD) override {
+      Gen->HandleVTable(RD);
     }
 
     static void InlineAsmDiagHandler(const llvm::SMDiagnostic &SM,void *Context,
@@ -238,6 +219,13 @@ namespace clang {
       ((BackendConsumer *)Context)->DiagnosticHandlerImpl(DI);
     }
 
+    /// Get the best possible source location to represent a diagnostic that
+    /// may have associated debug info.
+    const FullSourceLoc
+    getBestLocationFromDebugLoc(const llvm::DiagnosticInfoWithDebugLocBase &D,
+                                bool &BadDebugInfo, StringRef &Filename,
+                                unsigned &Line, unsigned &Column) const;
+
     void InlineAsmDiagHandler2(const llvm::SMDiagnostic &,
                                SourceLocation LocCookie);
 
@@ -250,6 +238,8 @@ namespace clang {
     /// \return True if the diagnostic has been successfully reported, false
     /// otherwise.
     bool StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D);
+    /// \brief Specialized handler for unsupported backend feature diagnostic.
+    void UnsupportedDiagHandler(const llvm::DiagnosticInfoUnsupported &D);
     /// \brief Specialized handlers for optimization remarks.
     /// Note that these handlers only accept remarks and they always handle
     /// them.
@@ -426,25 +416,21 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) {
     return false;
 
   if (const Decl *ND = Gen->GetDeclForMangledName(D.getFunction().getName())) {
+    // FIXME: Shouldn't need to truncate to uint32_t
     Diags.Report(ND->getASTContext().getFullLoc(ND->getLocation()),
                  diag::warn_fe_frame_larger_than)
-        << D.getStackSize() << Decl::castToDeclContext(ND);
+      << static_cast<uint32_t>(D.getStackSize()) << Decl::castToDeclContext(ND);
     return true;
   }
 
   return false;
 }
 
-void BackendConsumer::EmitOptimizationMessage(
-    const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) {
-  // We only support warnings and remarks.
-  assert(D.getSeverity() == llvm::DS_Remark ||
-         D.getSeverity() == llvm::DS_Warning);
-
+const FullSourceLoc BackendConsumer::getBestLocationFromDebugLoc(
+    const llvm::DiagnosticInfoWithDebugLocBase &D, bool &BadDebugInfo, StringRef &Filename,
+                                unsigned &Line, unsigned &Column) const {
   SourceManager &SourceMgr = Context->getSourceManager();
   FileManager &FileMgr = SourceMgr.getFileManager();
-  StringRef Filename;
-  unsigned Line, Column;
   SourceLocation DILoc;
 
   if (D.isLocationAvailable()) {
@@ -455,6 +441,7 @@ void BackendConsumer::EmitOptimizationMessage(
       // source manager, so pass 1 if Column is not set.
       DILoc = SourceMgr.translateFileLineCol(FE, Line, Column ? Column : 1);
     }
+    BadDebugInfo = DILoc.isInvalid();
   }
 
   // If a location isn't available, try to approximate it using the associated
@@ -463,18 +450,63 @@ void BackendConsumer::EmitOptimizationMessage(
   FullSourceLoc Loc(DILoc, SourceMgr);
   if (Loc.isInvalid())
     if (const Decl *FD = Gen->GetDeclForMangledName(D.getFunction().getName()))
-      Loc = FD->getASTContext().getFullLoc(FD->getBodyRBrace());
+      Loc = FD->getASTContext().getFullLoc(FD->getLocation());
+
+  if (DILoc.isInvalid() && D.isLocationAvailable())
+    // If we were not able to translate the file:line:col information
+    // back to a SourceLocation, at least emit a note stating that
+    // we could not translate this location. This can happen in the
+    // case of #line directives.
+    Diags.Report(Loc, diag::note_fe_backend_invalid_loc)
+        << Filename << Line << Column;
+
+  return Loc;
+}
+
+void BackendConsumer::UnsupportedDiagHandler(
+    const llvm::DiagnosticInfoUnsupported &D) {
+  // We only support errors.
+  assert(D.getSeverity() == llvm::DS_Error);
+
+  StringRef Filename;
+  unsigned Line, Column;
+  bool BadDebugInfo;
+  FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename,
+      Line, Column);
+
+  Diags.Report(Loc, diag::err_fe_backend_unsupported) << D.getMessage().str();
+
+  if (BadDebugInfo)
+    // If we were not able to translate the file:line:col information
+    // back to a SourceLocation, at least emit a note stating that
+    // we could not translate this location. This can happen in the
+    // case of #line directives.
+    Diags.Report(Loc, diag::note_fe_backend_invalid_loc)
+        << Filename << Line << Column;
+}
+
+void BackendConsumer::EmitOptimizationMessage(
+    const llvm::DiagnosticInfoOptimizationBase &D, unsigned DiagID) {
+  // We only support warnings and remarks.
+  assert(D.getSeverity() == llvm::DS_Remark ||
+         D.getSeverity() == llvm::DS_Warning);
+
+  StringRef Filename;
+  unsigned Line, Column;
+  bool BadDebugInfo = false;
+  FullSourceLoc Loc = getBestLocationFromDebugLoc(D, BadDebugInfo, Filename,
+      Line, Column);
 
   Diags.Report(Loc, DiagID)
       << AddFlagValue(D.getPassName() ? D.getPassName() : "")
       << D.getMsg().str();
 
-  if (DILoc.isInvalid() && D.isLocationAvailable())
+  if (BadDebugInfo)
     // If we were not able to translate the file:line:col information
     // back to a SourceLocation, at least emit a note stating that
     // we could not translate this location. This can happen in the
     // case of #line directives.
-    Diags.Report(Loc, diag::note_fe_backend_optimization_remark_invalid_loc)
+    Diags.Report(Loc, diag::note_fe_backend_invalid_loc)
         << Filename << Line << Column;
 }
 
@@ -504,7 +536,7 @@ void BackendConsumer::OptimizationRemarkHandler(
   // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
   // regular expression that matches the name of the pass name in \p D.
 
-  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+  if (D.shouldAlwaysPrint() ||
       (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
        CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
     EmitOptimizationMessage(
@@ -517,7 +549,7 @@ void BackendConsumer::OptimizationRemarkHandler(
   // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
   // regular expression that matches the name of the pass name in \p D.
 
-  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+  if (D.shouldAlwaysPrint() ||
       (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
        CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
     EmitOptimizationMessage(
@@ -530,7 +562,7 @@ void BackendConsumer::OptimizationRemarkHandler(
   // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
   // regular expression that matches the name of the pass name in \p D.
 
-  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+  if (D.shouldAlwaysPrint() ||
       (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
        CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
     EmitOptimizationMessage(
@@ -599,6 +631,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
     // handler.
     OptimizationFailureHandler(cast<DiagnosticInfoOptimizationFailure>(DI));
     return;
+  case llvm::DK_Unsupported:
+    UnsupportedDiagHandler(cast<DiagnosticInfoUnsupported>(DI));
+    return;
   default:
     // Plugin IDs are not bound to any value as they are set dynamically.
     ComputeDiagRemarkID(Severity, backend_plugin, DiagID);
@@ -657,7 +692,7 @@ llvm::LLVMContext *CodeGenAction::takeLLVMContext() {
   return VMContext;
 }
 
-static raw_pwrite_stream *
+static std::unique_ptr<raw_pwrite_stream>
 GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) {
   switch (Action) {
   case Backend_EmitAssembly:
@@ -680,7 +715,7 @@ GetOutputStream(CompilerInstance &CI, StringRef InFile, BackendAction Action) {
 std::unique_ptr<ASTConsumer>
 CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   BackendAction BA = static_cast<BackendAction>(Act);
-  raw_pwrite_stream *OS = GetOutputStream(CI, InFile, BA);
+  std::unique_ptr<raw_pwrite_stream> OS = GetOutputStream(CI, InFile, BA);
   if (BA != Backend_EmitNothing && !OS)
     return nullptr;
 
@@ -720,7 +755,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
       BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
       CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
       CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules,
-      OS, *VMContext, CoverageInfo));
+      std::move(OS), *VMContext, CoverageInfo));
   BEConsumer = Result.get();
   return std::move(Result);
 }
@@ -729,6 +764,22 @@ static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM,
                                          void *Context,
                                          unsigned LocCookie) {
   SM.print(nullptr, llvm::errs());
+
+  auto Diags = static_cast<DiagnosticsEngine *>(Context);
+  unsigned DiagID;
+  switch (SM.getKind()) {
+  case llvm::SourceMgr::DK_Error:
+    DiagID = diag::err_fe_inline_asm;
+    break;
+  case llvm::SourceMgr::DK_Warning:
+    DiagID = diag::warn_fe_inline_asm;
+    break;
+  case llvm::SourceMgr::DK_Note:
+    DiagID = diag::note_fe_inline_asm;
+    break;
+  }
+
+  Diags->Report(DiagID).AddString("cannot compile inline asm");
 }
 
 void CodeGenAction::ExecuteAction() {
@@ -736,7 +787,8 @@ void CodeGenAction::ExecuteAction() {
   if (getCurrentFileKind() == IK_LLVM_IR) {
     BackendAction BA = static_cast<BackendAction>(Act);
     CompilerInstance &CI = getCompilerInstance();
-    raw_pwrite_stream *OS = GetOutputStream(CI, getCurrentFile(), BA);
+    std::unique_ptr<raw_pwrite_stream> OS =
+        GetOutputStream(CI, getCurrentFile(), BA);
     if (BA != Backend_EmitNothing && !OS)
       return;
 
@@ -747,6 +799,11 @@ void CodeGenAction::ExecuteAction() {
     if (Invalid)
       return;
 
+    // For ThinLTO backend invocations, ensure that the context
+    // merges types based on ODR identifiers.
+    if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty())
+      VMContext->enableDebugTypeODRUniquing();
+
     llvm::SMDiagnostic Err;
     TheModule = parseIR(MainFile->getMemBufferRef(), Err, *VMContext);
     if (!TheModule) {
@@ -779,11 +836,16 @@ void CodeGenAction::ExecuteAction() {
       TheModule->setTargetTriple(TargetOpts.Triple);
     }
 
+    EmbedBitcode(TheModule.get(), CI.getCodeGenOpts(),
+                 MainFile->getMemBufferRef());
+
     LLVMContext &Ctx = TheModule->getContext();
-    Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler);
+    Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler,
+                                      &CI.getDiagnostics());
+
     EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
-                      CI.getLangOpts(), CI.getTarget().getDataLayoutString(),
-                      TheModule.get(), BA, OS);
+                      CI.getLangOpts(), CI.getTarget().getDataLayout(),
+                      TheModule.get(), BA, std::move(OS));
     return;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
index e38ff0a..11e4ad9 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -25,6 +25,7 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtObjC.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
@@ -397,10 +398,17 @@ bool CodeGenFunction::ShouldInstrumentFunction() {
   return true;
 }
 
+/// ShouldXRayInstrument - Return true if the current function should be
+/// instrumented with XRay nop sleds.
+bool CodeGenFunction::ShouldXRayInstrumentFunction() const {
+  return CGM.getCodeGenOpts().XRayInstrumentFunctions;
+}
+
 /// EmitFunctionInstrumentation - Emit LLVM code to call the specified
 /// instrumentation function with the current function and the call site, if
 /// function instrumentation is enabled.
 void CodeGenFunction::EmitFunctionInstrumentation(const char *Fn) {
+  auto NL = ApplyDebugLocation::CreateArtificial(*this);
   // void __cyg_profile_func_{enter,exit} (void *this_fn, void *call_site);
   llvm::PointerType *PointerTy = Int8PtrTy;
   llvm::Type *ProfileFuncArgs[] = { PointerTy, PointerTy };
@@ -429,12 +437,28 @@ void CodeGenFunction::EmitMCountInstrumentation() {
   EmitNounwindRuntimeCall(MCountFn);
 }
 
+// Returns the address space id that should be produced to the
+// kernel_arg_addr_space metadata. This is always fixed to the ids
+// as specified in the SPIR 2.0 specification in order to differentiate
+// for example in clGetKernelArgInfo() implementation between the address
+// spaces with targets without unique mapping to the OpenCL address spaces
+// (basically all single AS CPUs).
+static unsigned ArgInfoAddressSpace(unsigned LangAS) {
+  switch (LangAS) {
+  case LangAS::opencl_global:   return 1;
+  case LangAS::opencl_constant: return 2;
+  case LangAS::opencl_local:    return 3;
+  case LangAS::opencl_generic:  return 4; // Not in SPIR 2.0 specs.
+  default:
+    return 0; // Assume private.
+  }
+}
+
 // OpenCL v1.2 s5.6.4.6 allows the compiler to store kernel argument
 // information in the program executable. The argument information stored
 // includes the argument name, its type, the address and access qualifiers used.
 static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
                                  CodeGenModule &CGM, llvm::LLVMContext &Context,
-                                 SmallVector<llvm::Metadata *, 5> &kernelMDArgs,
                                  CGBuilderTy &Builder, ASTContext &ASTCtx) {
   // Create MDNodes that represent the kernel arg metadata.
   // Each MDNode is a list in the form of "key", N number of values which is
@@ -444,28 +468,21 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
 
   // MDNode for the kernel argument address space qualifiers.
   SmallVector<llvm::Metadata *, 8> addressQuals;
-  addressQuals.push_back(llvm::MDString::get(Context, "kernel_arg_addr_space"));
 
   // MDNode for the kernel argument access qualifiers (images only).
   SmallVector<llvm::Metadata *, 8> accessQuals;
-  accessQuals.push_back(llvm::MDString::get(Context, "kernel_arg_access_qual"));
 
   // MDNode for the kernel argument type names.
   SmallVector<llvm::Metadata *, 8> argTypeNames;
-  argTypeNames.push_back(llvm::MDString::get(Context, "kernel_arg_type"));
 
   // MDNode for the kernel argument base type names.
   SmallVector<llvm::Metadata *, 8> argBaseTypeNames;
-  argBaseTypeNames.push_back(
-      llvm::MDString::get(Context, "kernel_arg_base_type"));
 
   // MDNode for the kernel argument type qualifiers.
   SmallVector<llvm::Metadata *, 8> argTypeQuals;
-  argTypeQuals.push_back(llvm::MDString::get(Context, "kernel_arg_type_qual"));
 
   // MDNode for the kernel argument names.
   SmallVector<llvm::Metadata *, 8> argNames;
-  argNames.push_back(llvm::MDString::get(Context, "kernel_arg_name"));
 
   for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) {
     const ParmVarDecl *parm = FD->getParamDecl(i);
@@ -477,7 +494,7 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
 
       // Get address qualifier.
       addressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(
-          ASTCtx.getTargetAddressSpace(pointeeTy.getAddressSpace()))));
+        ArgInfoAddressSpace(pointeeTy.getAddressSpace()))));
 
       // Get argument type name.
       std::string typeName =
@@ -514,8 +531,7 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
       uint32_t AddrSpc = 0;
       bool isPipe = ty->isPipeType();
       if (ty->isImageType() || isPipe)
-        AddrSpc =
-          CGM.getContext().getTargetAddressSpace(LangAS::opencl_global);
+        AddrSpc = ArgInfoAddressSpace(LangAS::opencl_global);
 
       addressQuals.push_back(
           llvm::ConstantAsMetadata::get(Builder.getInt32(AddrSpc)));
@@ -523,7 +539,8 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
       // Get argument type name.
       std::string typeName;
       if (isPipe)
-        typeName = cast<PipeType>(ty)->getElementType().getAsString(Policy);
+        typeName = ty.getCanonicalType()->getAs<PipeType>()->getElementType()
+                     .getAsString(Policy);
       else
         typeName = ty.getUnqualifiedType().getAsString(Policy);
 
@@ -536,8 +553,9 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
 
       std::string baseTypeName;
       if (isPipe)
-        baseTypeName =
-          cast<PipeType>(ty)->getElementType().getCanonicalType().getAsString(Policy);
+        baseTypeName = ty.getCanonicalType()->getAs<PipeType>()
+                          ->getElementType().getCanonicalType()
+                          .getAsString(Policy);
       else
         baseTypeName =
           ty.getUnqualifiedType().getCanonicalType().getAsString(Policy);
@@ -561,15 +579,14 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
     argTypeQuals.push_back(llvm::MDString::get(Context, typeQuals));
 
     // Get image and pipe access qualifier:
-    // FIXME: now image and pipe share the same access qualifier maybe we can
-    // refine it to OpenCL access qualifier and also handle write_read
     if (ty->isImageType()|| ty->isPipeType()) {
-      const OpenCLImageAccessAttr *A = parm->getAttr<OpenCLImageAccessAttr>();
+      const OpenCLAccessAttr *A = parm->getAttr<OpenCLAccessAttr>();
       if (A && A->isWriteOnly())
         accessQuals.push_back(llvm::MDString::get(Context, "write_only"));
+      else if (A && A->isReadWrite())
+        accessQuals.push_back(llvm::MDString::get(Context, "read_write"));
       else
         accessQuals.push_back(llvm::MDString::get(Context, "read_only"));
-      // FIXME: what about read_write?
     } else
       accessQuals.push_back(llvm::MDString::get(Context, "none"));
 
@@ -577,13 +594,19 @@ static void GenOpenCLArgMetadata(const FunctionDecl *FD, llvm::Function *Fn,
     argNames.push_back(llvm::MDString::get(Context, parm->getName()));
   }
 
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, addressQuals));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, accessQuals));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeNames));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, argBaseTypeNames));
-  kernelMDArgs.push_back(llvm::MDNode::get(Context, argTypeQuals));
+  Fn->setMetadata("kernel_arg_addr_space",
+                  llvm::MDNode::get(Context, addressQuals));
+  Fn->setMetadata("kernel_arg_access_qual",
+                  llvm::MDNode::get(Context, accessQuals));
+  Fn->setMetadata("kernel_arg_type",
+                  llvm::MDNode::get(Context, argTypeNames));
+  Fn->setMetadata("kernel_arg_base_type",
+                  llvm::MDNode::get(Context, argBaseTypeNames));
+  Fn->setMetadata("kernel_arg_type_qual",
+                  llvm::MDNode::get(Context, argTypeQuals));
   if (CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
-    kernelMDArgs.push_back(llvm::MDNode::get(Context, argNames));
+    Fn->setMetadata("kernel_arg_name",
+                    llvm::MDNode::get(Context, argNames));
 }
 
 void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
@@ -594,11 +617,7 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
 
   llvm::LLVMContext &Context = getLLVMContext();
 
-  SmallVector<llvm::Metadata *, 5> kernelMDArgs;
-  kernelMDArgs.push_back(llvm::ConstantAsMetadata::get(Fn));
-
-  GenOpenCLArgMetadata(FD, Fn, CGM, Context, kernelMDArgs, Builder,
-                       getContext());
+  GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext());
 
   if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
     QualType hintQTy = A->getTypeHint();
@@ -607,37 +626,29 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
         hintQTy->isSignedIntegerType() ||
         (hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
     llvm::Metadata *attrMDArgs[] = {
-        llvm::MDString::get(Context, "vec_type_hint"),
         llvm::ConstantAsMetadata::get(llvm::UndefValue::get(
             CGM.getTypes().ConvertType(A->getTypeHint()))),
         llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
             llvm::IntegerType::get(Context, 32),
             llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
-    kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
+    Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs));
   }
 
   if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
     llvm::Metadata *attrMDArgs[] = {
-        llvm::MDString::get(Context, "work_group_size_hint"),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
-    kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
+    Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs));
   }
 
   if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
     llvm::Metadata *attrMDArgs[] = {
-        llvm::MDString::get(Context, "reqd_work_group_size"),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
         llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
-    kernelMDArgs.push_back(llvm::MDNode::get(Context, attrMDArgs));
+    Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs));
   }
-
-  llvm::MDNode *kernelMDNode = llvm::MDNode::get(Context, kernelMDArgs);
-  llvm::NamedMDNode *OpenCLKernelMetadata =
-    CGM.getModule().getOrInsertNamedMetadata("opencl.kernels");
-  OpenCLKernelMetadata->addOperand(kernelMDNode);
 }
 
 /// Determine whether the function F ends with a return stmt.
@@ -670,6 +681,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
 
   DidCallStackSave = false;
   CurCodeDecl = D;
+  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D))
+    if (FD->usesSEHTry())
+      CurSEHParent = FD;
   CurFuncDecl = (D ? D->getNonClosureContext() : nullptr);
   FnRetTy = RetTy;
   CurFn = Fn;
@@ -695,20 +709,46 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
   if (SanOpts.has(SanitizerKind::SafeStack))
     Fn->addFnAttr(llvm::Attribute::SafeStack);
 
+  // Apply xray attributes to the function (as a string, for now)
+  if (D && ShouldXRayInstrumentFunction()) {
+    if (const auto *XRayAttr = D->getAttr<XRayInstrumentAttr>()) {
+      if (XRayAttr->alwaysXRayInstrument())
+        Fn->addFnAttr("function-instrument", "xray-always");
+      if (XRayAttr->neverXRayInstrument())
+        Fn->addFnAttr("function-instrument", "xray-never");
+    } else {
+      Fn->addFnAttr(
+          "xray-instruction-threshold",
+          llvm::itostr(CGM.getCodeGenOpts().XRayInstructionThreshold));
+    }
+  }
+
   // Pass inline keyword to optimizer if it appears explicitly on any
   // declaration. Also, in the case of -fno-inline attach NoInline
-  // attribute to all function that are not marked AlwaysInline.
+  // attribute to all functions that are not marked AlwaysInline, or
+  // to all functions that are not marked inline or implicitly inline
+  // in the case of -finline-hint-functions.
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
-    if (!CGM.getCodeGenOpts().NoInline) {
+    const CodeGenOptions& CodeGenOpts = CGM.getCodeGenOpts();
+    if (!CodeGenOpts.NoInline) {
       for (auto RI : FD->redecls())
         if (RI->isInlineSpecified()) {
           Fn->addFnAttr(llvm::Attribute::InlineHint);
           break;
         }
+      if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining &&
+          !FD->isInlined() && !Fn->hasFnAttribute(llvm::Attribute::InlineHint))
+        Fn->addFnAttr(llvm::Attribute::NoInline);
     } else if (!FD->hasAttr<AlwaysInlineAttr>())
       Fn->addFnAttr(llvm::Attribute::NoInline);
+    if (CGM.getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
+      CGM.getOpenMPRuntime().emitDeclareSimdFunction(FD, Fn);
   }
 
+  // Add no-jump-tables value.
+  Fn->addFnAttr("no-jump-tables",
+                llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables));
+
   if (getLangOpts().OpenCL) {
     // Add metadata for a kernel function.
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
@@ -745,9 +785,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
   // later.  Don't create this with the builder, because we don't want it
   // folded.
   llvm::Value *Undef = llvm::UndefValue::get(Int32Ty);
-  AllocaInsertPt = new llvm::BitCastInst(Undef, Int32Ty, "", EntryBB);
-  if (Builder.isNamePreserving())
-    AllocaInsertPt->setName("allocapt");
+  AllocaInsertPt = new llvm::BitCastInst(Undef, Int32Ty, "allocapt", EntryBB);
 
   ReturnBlock = getJumpDestInCurrentScope("return");
 
@@ -755,15 +793,18 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
 
   // Emit subprogram debug descriptor.
   if (CGDebugInfo *DI = getDebugInfo()) {
+    // Reconstruct the type from the argument list so that implicit parameters,
+    // such as 'this' and 'vtt', show up in the debug info. Preserve the calling
+    // convention.
+    CallingConv CC = CallingConv::CC_C;
+    if (auto *FD = dyn_cast_or_null<FunctionDecl>(D))
+      if (const auto *SrcFnTy = FD->getType()->getAs<FunctionType>())
+        CC = SrcFnTy->getCallConv();
     SmallVector<QualType, 16> ArgTypes;
-    for (FunctionArgList::const_iterator i = Args.begin(), e = Args.end();
-	 i != e; ++i) {
-      ArgTypes.push_back((*i)->getType());
-    }
-
-    QualType FnType =
-      getContext().getFunctionType(RetTy, ArgTypes,
-                                   FunctionProtoType::ExtProtoInfo());
+    for (const VarDecl *VD : Args)
+      ArgTypes.push_back(VD->getType());
+    QualType FnType = getContext().getFunctionType(
+        RetTy, ArgTypes, FunctionProtoType::ExtProtoInfo(CC));
     DI->EmitFunctionStart(GD, Loc, StartLoc, FnType, CurFn, Builder);
   }
 
@@ -823,10 +864,22 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
       MD->getParent()->getCaptureFields(LambdaCaptureFields,
                                         LambdaThisCaptureField);
       if (LambdaThisCaptureField) {
-        // If this lambda captures this, load it.
-        LValue ThisLValue = EmitLValueForLambdaField(LambdaThisCaptureField);
-        CXXThisValue = EmitLoadOfLValue(ThisLValue,
-                                        SourceLocation()).getScalarVal();
+        // If the lambda captures the object referred to by '*this' - either by
+        // value or by reference, make sure CXXThisValue points to the correct
+        // object.
+
+        // Get the lvalue for the field (which is a copy of the enclosing object
+        // or contains the address of the enclosing object).
+        LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField);
+        if (!LambdaThisCaptureField->getType()->isPointerType()) {
+          // If the enclosing object was captured by value, just use its address.
+          CXXThisValue = ThisFieldLValue.getAddress().getPointer();
+        } else {
+          // Load the lvalue pointed to by the field, since '*this' was captured
+          // by reference.
+          CXXThisValue =
+              EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal();
+        }
       }
       for (auto *FD : MD->getParent()->fields()) {
         if (FD->hasCapturedVLAType()) {
@@ -883,7 +936,7 @@ void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
 void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
                                                const Stmt *S) {
   llvm::BasicBlock *SkipCountBB = nullptr;
-  if (HaveInsertPoint() && CGM.getCodeGenOpts().ProfileInstrGenerate) {
+  if (HaveInsertPoint() && CGM.getCodeGenOpts().hasProfileClangInstr()) {
     // When instrumenting for profiling, the fallthrough to certain
     // statements needs to skip over the instrumentation code so that we
     // get an accurate count.
@@ -904,7 +957,7 @@ void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB,
 static void TryMarkNoThrow(llvm::Function *F) {
   // LLVM treats 'nounwind' on a function as part of the type, so we
   // can't do this on functions that can be overwritten.
-  if (F->mayBeOverridden()) return;
+  if (F->isInterposable()) return;
 
   for (llvm::BasicBlock &BB : *F)
     for (llvm::Instruction &I : BB)
@@ -914,18 +967,11 @@ static void TryMarkNoThrow(llvm::Function *F) {
   F->setDoesNotThrow();
 }
 
-void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
-                                   const CGFunctionInfo &FnInfo) {
+QualType CodeGenFunction::BuildFunctionArgList(GlobalDecl GD,
+                                               FunctionArgList &Args) {
   const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
-
-  // Check if we should generate debug info for this function.
-  if (FD->hasAttr<NoDebugAttr>())
-    DebugInfo = nullptr; // disable debug info indefinitely for this function
-
-  FunctionArgList Args;
   QualType ResTy = FD->getReturnType();
 
-  CurGD = GD;
   const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD);
   if (MD && MD->isInstance()) {
     if (CGM.getCXXABI().HasThisReturn(GD))
@@ -935,22 +981,48 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
     CGM.getCXXABI().buildThisParam(*this, Args);
   }
 
-  for (auto *Param : FD->params()) {
-    Args.push_back(Param);
-    if (!Param->hasAttr<PassObjectSizeAttr>())
-      continue;
-
-    IdentifierInfo *NoID = nullptr;
-    auto *Implicit = ImplicitParamDecl::Create(
-        getContext(), Param->getDeclContext(), Param->getLocation(), NoID,
-        getContext().getSizeType());
-    SizeArguments[Param] = Implicit;
-    Args.push_back(Implicit);
+  // The base version of an inheriting constructor whose constructed base is a
+  // virtual base is not passed any arguments (because it doesn't actually call
+  // the inherited constructor).
+  bool PassedParams = true;
+  if (const CXXConstructorDecl *CD = dyn_cast<CXXConstructorDecl>(FD))
+    if (auto Inherited = CD->getInheritedConstructor())
+      PassedParams =
+          getTypes().inheritingCtorHasParams(Inherited, GD.getCtorType());
+
+  if (PassedParams) {
+    for (auto *Param : FD->parameters()) {
+      Args.push_back(Param);
+      if (!Param->hasAttr<PassObjectSizeAttr>())
+        continue;
+
+      IdentifierInfo *NoID = nullptr;
+      auto *Implicit = ImplicitParamDecl::Create(
+          getContext(), Param->getDeclContext(), Param->getLocation(), NoID,
+          getContext().getSizeType());
+      SizeArguments[Param] = Implicit;
+      Args.push_back(Implicit);
+    }
   }
 
   if (MD && (isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)))
     CGM.getCXXABI().addImplicitStructorParams(*this, ResTy, Args);
 
+  return ResTy;
+}
+
+void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
+                                   const CGFunctionInfo &FnInfo) {
+  const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
+  CurGD = GD;
+
+  FunctionArgList Args;
+  QualType ResTy = BuildFunctionArgList(GD, Args);
+
+  // Check if we should generate debug info for this function.
+  if (FD->hasAttr<NoDebugAttr>())
+    DebugInfo = nullptr; // disable debug info indefinitely for this function
+
   SourceRange BodyRange;
   if (Stmt *Body = FD->getBody()) BodyRange = Body->getSourceRange();
   CurEHLocation = BodyRange.getEnd();
@@ -1088,14 +1160,37 @@ bool CodeGenFunction::containsBreak(const Stmt *S) {
   return false;
 }
 
+bool CodeGenFunction::mightAddDeclToScope(const Stmt *S) {
+  if (!S) return false;
+
+  // Some statement kinds add a scope and thus never add a decl to the current
+  // scope. Note, this list is longer than the list of statements that might
+  // have an unscoped decl nested within them, but this way is conservatively
+  // correct even if more statement kinds are added.
+  if (isa<IfStmt>(S) || isa<SwitchStmt>(S) || isa<WhileStmt>(S) ||
+      isa<DoStmt>(S) || isa<ForStmt>(S) || isa<CompoundStmt>(S) ||
+      isa<CXXForRangeStmt>(S) || isa<CXXTryStmt>(S) ||
+      isa<ObjCForCollectionStmt>(S) || isa<ObjCAtTryStmt>(S))
+    return false;
+
+  if (isa<DeclStmt>(S))
+    return true;
+
+  for (const Stmt *SubStmt : S->children())
+    if (mightAddDeclToScope(SubStmt))
+      return true;
+
+  return false;
+}
 
 /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
 /// to a constant, or if it does but contains a label, return false.  If it
 /// constant folds return true and set the boolean result in Result.
 bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
-                                                   bool &ResultBool) {
+                                                   bool &ResultBool,
+                                                   bool AllowLabels) {
   llvm::APSInt ResultInt;
-  if (!ConstantFoldsToSimpleInteger(Cond, ResultInt))
+  if (!ConstantFoldsToSimpleInteger(Cond, ResultInt, AllowLabels))
     return false;
 
   ResultBool = ResultInt.getBoolValue();
@@ -1105,15 +1200,16 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
 /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
 /// to a constant, or if it does but contains a label, return false.  If it
 /// constant folds return true and set the folded value.
-bool CodeGenFunction::
-ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &ResultInt) {
+bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond,
+                                                   llvm::APSInt &ResultInt,
+                                                   bool AllowLabels) {
   // FIXME: Rename and handle conversion of other evaluatable things
   // to bool.
   llvm::APSInt Int;
   if (!Cond->EvaluateAsInt(Int, getContext()))
     return false;  // Not foldable, not integer or not fully evaluatable.
 
-  if (CodeGenFunction::ContainsLabel(Cond))
+  if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond))
     return false;  // Contains a label.
 
   ResultInt = Int;
@@ -1297,15 +1393,12 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
   // create metadata that specifies that the branch is unpredictable.
   // Don't bother if not optimizing because that metadata would not be used.
   llvm::MDNode *Unpredictable = nullptr;
-  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
-    if (const CallExpr *Call = dyn_cast<CallExpr>(Cond)) {
-      const Decl *TargetDecl = Call->getCalleeDecl();
-      if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
-        if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
-          llvm::MDBuilder MDHelper(getLLVMContext());
-          Unpredictable = MDHelper.createUnpredictable();
-        }
-      }
+  auto *Call = dyn_cast<CallExpr>(Cond);
+  if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
+    auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
+    if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+      llvm::MDBuilder MDHelper(getLLVMContext());
+      Unpredictable = MDHelper.createUnpredictable();
     }
   }
 
@@ -1764,7 +1857,7 @@ void CodeGenFunction::EmitDeclRefExprDbgValue(const DeclRefExpr *E,
                                               llvm::Constant *Init) {
   assert (Init && "Invalid DeclRefExpr initializer!");
   if (CGDebugInfo *Dbg = getDebugInfo())
-    if (CGM.getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo)
+    if (CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo)
       Dbg->EmitGlobalVariable(E->getDecl(), Init);
 }
 
@@ -1860,26 +1953,14 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I,
     CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I);
 }
 
-template <bool PreserveNames>
-void CGBuilderInserter<PreserveNames>::InsertHelper(
+void CGBuilderInserter::InsertHelper(
     llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
     llvm::BasicBlock::iterator InsertPt) const {
-  llvm::IRBuilderDefaultInserter<PreserveNames>::InsertHelper(I, Name, BB,
-                                                              InsertPt);
+  llvm::IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt);
   if (CGF)
     CGF->InsertHelper(I, Name, BB, InsertPt);
 }
 
-#ifdef NDEBUG
-#define PreserveNames false
-#else
-#define PreserveNames true
-#endif
-template void CGBuilderInserter<PreserveNames>::InsertHelper(
-    llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
-    llvm::BasicBlock::iterator InsertPt) const;
-#undef PreserveNames
-
 static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures,
                                 CodeGenModule &CGM, const FunctionDecl *FD,
                                 std::string &FirstMissing) {
@@ -1956,3 +2037,12 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
           << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
   }
 }
+
+void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) {
+  if (!CGM.getCodeGenOpts().SanitizeStats)
+    return;
+
+  llvm::IRBuilder<> IRB(Builder.GetInsertBlock(), Builder.GetInsertPoint());
+  IRB.SetCurrentDebugLocation(Builder.getCurrentDebugLocation());
+  CGM.getSanStats().create(IRB, SSK);
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
index 4803b13..fb19a26 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h
@@ -36,6 +36,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/SanitizerStats.h"
 
 namespace llvm {
 class BasicBlock;
@@ -67,7 +68,6 @@ class ObjCMethodDecl;
 class ObjCImplementationDecl;
 class ObjCPropertyImplDecl;
 class TargetInfo;
-class TargetCodeGenInfo;
 class VarDecl;
 class ObjCForCollectionStmt;
 class ObjCAtTryStmt;
@@ -85,6 +85,9 @@ class BlockByrefHelpers;
 class BlockByrefInfo;
 class BlockFlags;
 class BlockFieldFlags;
+class RegionCodeGenTy;
+class TargetCodeGenInfo;
+struct OMPTaskDataTy;
 
 /// The kind of evaluation to perform on values of a particular
 /// type.  Basically, is the code in CGExprScalar, CGExprComplex, or
@@ -188,6 +191,8 @@ public:
           CXXThisFieldDecl = *Field;
         else if (I->capturesVariable())
           CaptureFields[I->getCapturedVar()] = *Field;
+        else if (I->capturesVariableByCopy())
+          CaptureFields[I->getCapturedVar()] = *Field;
       }
     }
 
@@ -275,6 +280,8 @@ public:
   /// potentially set the return value.
   bool SawAsmBlock;
 
+  const FunctionDecl *CurSEHParent = nullptr;
+
   /// True if the current function is an outlined SEH helper. This can be a
   /// finally block or filter expression.
   bool IsOutlinedSEHHelper;
@@ -295,6 +302,19 @@ public:
 
   llvm::Instruction *CurrentFuncletPad = nullptr;
 
+  class CallLifetimeEnd final : public EHScopeStack::Cleanup {
+    llvm::Value *Addr;
+    llvm::Value *Size;
+
+  public:
+    CallLifetimeEnd(Address addr, llvm::Value *size)
+        : Addr(addr.getPointer()), Size(size) {}
+
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
+      CGF.EmitLifetimeEnd(Size, Addr);
+    }
+  };
+
   /// Header for data within LifetimeExtendedCleanupStack.
   struct LifetimeExtendedCleanupHeader {
     /// The size of the following cleanup object.
@@ -637,6 +657,11 @@ public:
         ForceCleanup();
     }
 
+    /// Checks if the global variable is captured in current function. 
+    bool isGlobalVarCaptured(const VarDecl *VD) const {
+      return !VD->isLocalVarDeclOrParm() && CGF.LocalDeclMap.count(VD) > 0;
+    }
+
   private:
     /// Copy all the entries in the source map over the corresponding
     /// entries in the destination, which must exist.
@@ -940,6 +965,94 @@ private:
   };
   SmallVector<BreakContinue, 8> BreakContinueStack;
 
+  /// Handles cancellation exit points in OpenMP-related constructs.
+  class OpenMPCancelExitStack {
+    /// Tracks cancellation exit point and join point for cancel-related exit
+    /// and normal exit.
+    struct CancelExit {
+      CancelExit() = default;
+      CancelExit(OpenMPDirectiveKind Kind, JumpDest ExitBlock,
+                 JumpDest ContBlock)
+          : Kind(Kind), ExitBlock(ExitBlock), ContBlock(ContBlock) {}
+      OpenMPDirectiveKind Kind = OMPD_unknown;
+      /// true if the exit block has been emitted already by the special
+      /// emitExit() call, false if the default codegen is used.
+      bool HasBeenEmitted = false;
+      JumpDest ExitBlock;
+      JumpDest ContBlock;
+    };
+
+    SmallVector<CancelExit, 8> Stack;
+
+  public:
+    OpenMPCancelExitStack() : Stack(1) {}
+    ~OpenMPCancelExitStack() = default;
+    /// Fetches the exit block for the current OpenMP construct.
+    JumpDest getExitBlock() const { return Stack.back().ExitBlock; }
+    /// Emits exit block with special codegen procedure specific for the related
+    /// OpenMP construct + emits code for normal construct cleanup.
+    void emitExit(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
+                  const llvm::function_ref<void(CodeGenFunction &)> &CodeGen) {
+      if (Stack.back().Kind == Kind && getExitBlock().isValid()) {
+        assert(CGF.getOMPCancelDestination(Kind).isValid());
+        assert(CGF.HaveInsertPoint());
+        assert(!Stack.back().HasBeenEmitted);
+        auto IP = CGF.Builder.saveAndClearIP();
+        CGF.EmitBlock(Stack.back().ExitBlock.getBlock());
+        CodeGen(CGF);
+        CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
+        CGF.Builder.restoreIP(IP);
+        Stack.back().HasBeenEmitted = true;
+      }
+      CodeGen(CGF);
+    }
+    /// Enter the cancel supporting \a Kind construct.
+    /// \param Kind OpenMP directive that supports cancel constructs.
+    /// \param HasCancel true, if the construct has inner cancel directive,
+    /// false otherwise.
+    void enter(CodeGenFunction &CGF, OpenMPDirectiveKind Kind, bool HasCancel) {
+      Stack.push_back({Kind,
+                       HasCancel ? CGF.getJumpDestInCurrentScope("cancel.exit")
+                                 : JumpDest(),
+                       HasCancel ? CGF.getJumpDestInCurrentScope("cancel.cont")
+                                 : JumpDest()});
+    }
+    /// Emits default exit point for the cancel construct (if the special one
+    /// has not be used) + join point for cancel/normal exits.
+    void exit(CodeGenFunction &CGF) {
+      if (getExitBlock().isValid()) {
+        assert(CGF.getOMPCancelDestination(Stack.back().Kind).isValid());
+        bool HaveIP = CGF.HaveInsertPoint();
+        if (!Stack.back().HasBeenEmitted) {
+          if (HaveIP)
+            CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
+          CGF.EmitBlock(Stack.back().ExitBlock.getBlock());
+          CGF.EmitBranchThroughCleanup(Stack.back().ContBlock);
+        }
+        CGF.EmitBlock(Stack.back().ContBlock.getBlock());
+        if (!HaveIP) {
+          CGF.Builder.CreateUnreachable();
+          CGF.Builder.ClearInsertionPoint();
+        }
+      }
+      Stack.pop_back();
+    }
+  };
+  OpenMPCancelExitStack OMPCancelStack;
+
+  /// Controls insertion of cancellation exit blocks in worksharing constructs.
+  class OMPCancelStackRAII {
+    CodeGenFunction &CGF;
+
+  public:
+    OMPCancelStackRAII(CodeGenFunction &CGF, OpenMPDirectiveKind Kind,
+                       bool HasCancel)
+        : CGF(CGF) {
+      CGF.OMPCancelStack.enter(CGF, Kind, HasCancel);
+    }
+    ~OMPCancelStackRAII() { CGF.OMPCancelStack.exit(CGF); }
+  };
+
   CodeGenPGO PGO;
 
   /// Calculate branch weights appropriate for PGO data
@@ -951,7 +1064,7 @@ private:
 public:
   /// Increment the profiler's counter for the given statement.
   void incrementProfileCounter(const Stmt *S) {
-    if (CGM.getCodeGenOpts().ProfileInstrGenerate)
+    if (CGM.getCodeGenOpts().hasProfileClangInstr())
       PGO.emitCounterIncrement(Builder, S);
     PGO.setCurrentStmt(S);
   }
@@ -1053,6 +1166,61 @@ public:
     CharUnits OldCXXThisAlignment;
   };
 
+  class InlinedInheritingConstructorScope {
+  public:
+    InlinedInheritingConstructorScope(CodeGenFunction &CGF, GlobalDecl GD)
+        : CGF(CGF), OldCurGD(CGF.CurGD), OldCurFuncDecl(CGF.CurFuncDecl),
+          OldCurCodeDecl(CGF.CurCodeDecl),
+          OldCXXABIThisDecl(CGF.CXXABIThisDecl),
+          OldCXXABIThisValue(CGF.CXXABIThisValue),
+          OldCXXThisValue(CGF.CXXThisValue),
+          OldCXXABIThisAlignment(CGF.CXXABIThisAlignment),
+          OldCXXThisAlignment(CGF.CXXThisAlignment),
+          OldReturnValue(CGF.ReturnValue), OldFnRetTy(CGF.FnRetTy),
+          OldCXXInheritedCtorInitExprArgs(
+              std::move(CGF.CXXInheritedCtorInitExprArgs)) {
+      CGF.CurGD = GD;
+      CGF.CurFuncDecl = CGF.CurCodeDecl =
+          cast<CXXConstructorDecl>(GD.getDecl());
+      CGF.CXXABIThisDecl = nullptr;
+      CGF.CXXABIThisValue = nullptr;
+      CGF.CXXThisValue = nullptr;
+      CGF.CXXABIThisAlignment = CharUnits();
+      CGF.CXXThisAlignment = CharUnits();
+      CGF.ReturnValue = Address::invalid();
+      CGF.FnRetTy = QualType();
+      CGF.CXXInheritedCtorInitExprArgs.clear();
+    }
+    ~InlinedInheritingConstructorScope() {
+      CGF.CurGD = OldCurGD;
+      CGF.CurFuncDecl = OldCurFuncDecl;
+      CGF.CurCodeDecl = OldCurCodeDecl;
+      CGF.CXXABIThisDecl = OldCXXABIThisDecl;
+      CGF.CXXABIThisValue = OldCXXABIThisValue;
+      CGF.CXXThisValue = OldCXXThisValue;
+      CGF.CXXABIThisAlignment = OldCXXABIThisAlignment;
+      CGF.CXXThisAlignment = OldCXXThisAlignment;
+      CGF.ReturnValue = OldReturnValue;
+      CGF.FnRetTy = OldFnRetTy;
+      CGF.CXXInheritedCtorInitExprArgs =
+          std::move(OldCXXInheritedCtorInitExprArgs);
+    }
+
+  private:
+    CodeGenFunction &CGF;
+    GlobalDecl OldCurGD;
+    const Decl *OldCurFuncDecl;
+    const Decl *OldCurCodeDecl;
+    ImplicitParamDecl *OldCXXABIThisDecl;
+    llvm::Value *OldCXXABIThisValue;
+    llvm::Value *OldCXXThisValue;
+    CharUnits OldCXXABIThisAlignment;
+    CharUnits OldCXXThisAlignment;
+    Address OldReturnValue;
+    QualType OldFnRetTy;
+    CallArgList OldCXXInheritedCtorInitExprArgs;
+  };
+
 private:
   /// CXXThisDecl - When generating code for a C++ member function,
   /// this will hold the implicit 'this' declaration.
@@ -1066,6 +1234,10 @@ private:
   /// this expression.
   Address CXXDefaultInitExprThis = Address::invalid();
 
+  /// The values of function arguments to use when evaluating
+  /// CXXInheritedCtorInitExprs within this context.
+  CallArgList CXXInheritedCtorInitExprArgs;
+
   /// CXXStructorImplicitParamDecl - When generating code for a constructor or
   /// destructor, this will hold the implicit argument (e.g. VTT).
   ImplicitParamDecl *CXXStructorImplicitParamDecl;
@@ -1149,10 +1321,7 @@ public:
     return getInvokeDestImpl();
   }
 
-  bool currentFunctionUsesSEHTry() const {
-    const auto *FD = dyn_cast_or_null<FunctionDecl>(CurCodeDecl);
-    return FD && FD->usesSEHTry();
-  }
+  bool currentFunctionUsesSEHTry() const { return CurSEHParent != nullptr; }
 
   const TargetInfo &getTarget() const { return Target; }
   llvm::LLVMContext &getLLVMContext() { return CGM.getLLVMContext(); }
@@ -1292,6 +1461,8 @@ public:
 
   const BlockByrefInfo &getBlockByrefInfo(const VarDecl *var);
 
+  QualType BuildFunctionArgList(GlobalDecl GD, FunctionArgList &Args);
+
   void GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                     const CGFunctionInfo &FnInfo);
   /// \brief Emit code for the start of a function.
@@ -1388,6 +1559,7 @@ public:
     CFITCK_NVCall,
     CFITCK_DerivedCast,
     CFITCK_UnrelatedCast,
+    CFITCK_ICall,
   };
 
   /// \brief Derived is the presumed address of an object of type T after a
@@ -1399,14 +1571,29 @@ public:
 
   /// EmitVTablePtrCheckForCall - Virtual method MD is being called via VTable.
   /// If vptr CFI is enabled, emit a check that VTable is valid.
-  void EmitVTablePtrCheckForCall(const CXXMethodDecl *MD, llvm::Value *VTable,
+  void EmitVTablePtrCheckForCall(const CXXRecordDecl *RD, llvm::Value *VTable,
                                  CFITypeCheckKind TCK, SourceLocation Loc);
 
   /// EmitVTablePtrCheck - Emit a check that VTable is a valid virtual table for
-  /// RD using llvm.bitset.test.
+  /// RD using llvm.type.test.
   void EmitVTablePtrCheck(const CXXRecordDecl *RD, llvm::Value *VTable,
                           CFITypeCheckKind TCK, SourceLocation Loc);
 
+  /// If whole-program virtual table optimization is enabled, emit an assumption
+  /// that VTable is a member of RD's type identifier. Or, if vptr CFI is
+  /// enabled, emit a check that VTable is a member of RD's type identifier.
+  void EmitTypeMetadataCodeForVCall(const CXXRecordDecl *RD,
+                                    llvm::Value *VTable, SourceLocation Loc);
+
+  /// Returns whether we should perform a type checked load when loading a
+  /// virtual function for virtual calls to members of RD. This is generally
+  /// true when both vcall CFI and whole-program-vtables are enabled.
+  bool ShouldEmitVTableTypeCheckedLoad(const CXXRecordDecl *RD);
+
+  /// Emit a type checked load from the given vtable.
+  llvm::Value *EmitVTableTypeCheckedLoad(const CXXRecordDecl *RD, llvm::Value *VTable,
+                                         uint64_t VTableByteOffset);
+
   /// CanDevirtualizeMemberFunctionCalls - Checks whether virtual calls on given
   /// expr can be devirtualized.
   bool CanDevirtualizeMemberFunctionCall(const Expr *Base,
@@ -1422,6 +1609,10 @@ public:
   /// instrumented with __cyg_profile_func_* calls
   bool ShouldInstrumentFunction();
 
+  /// ShouldXRayInstrument - Return true if the current function should be
+  /// instrumented with XRay nop sleds.
+  bool ShouldXRayInstrumentFunction() const;
+
   /// EmitFunctionInstrumentation - Emit LLVM code to call the specified
   /// instrumentation function with the current function and the call site, if
   /// function instrumentation is enabled.
@@ -1572,6 +1763,10 @@ public:
                               AlignmentSource *Source = nullptr);
   LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy);
 
+  Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy,
+                            AlignmentSource *Source = nullptr);
+  LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy);
+
   /// CreateTempAlloca - This creates a alloca and inserts it into the entry
   /// block. The caller is responsible for setting an appropriate alignment on
   /// the alloca.
@@ -1845,10 +2040,32 @@ public:
   void EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor,
                                         const FunctionArgList &Args);
 
+  /// Emit a call to an inheriting constructor (that is, one that invokes a
+  /// constructor inherited from a base class) by inlining its definition. This
+  /// is necessary if the ABI does not support forwarding the arguments to the
+  /// base class constructor (because they're variadic or similar).
+  void EmitInlinedInheritingCXXConstructorCall(const CXXConstructorDecl *Ctor,
+                                               CXXCtorType CtorType,
+                                               bool ForVirtualBase,
+                                               bool Delegating,
+                                               CallArgList &Args);
+
+  /// Emit a call to a constructor inherited from a base class, passing the
+  /// current constructor's arguments along unmodified (without even making
+  /// a copy).
+  void EmitInheritedCXXConstructorCall(const CXXConstructorDecl *D,
+                                       bool ForVirtualBase, Address This,
+                                       bool InheritedFromVBase,
+                                       const CXXInheritedCtorInitExpr *E);
+
   void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
                               bool ForVirtualBase, bool Delegating,
                               Address This, const CXXConstructExpr *E);
 
+  void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
+                              bool ForVirtualBase, bool Delegating,
+                              Address This, CallArgList &Args);
+
   /// Emit assumption load for all bases. Requires to be be called only on
   /// most-derived class and not under construction of the object.
   void EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl, Address This);
@@ -1861,7 +2078,7 @@ public:
                                       const CXXConstructExpr *E);
 
   void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D,
-                                  const ConstantArrayType *ArrayTy,
+                                  const ArrayType *ArrayTy,
                                   Address ArrayPtr,
                                   const CXXConstructExpr *E,
                                   bool ZeroInitialization = false);
@@ -2204,6 +2421,8 @@ public:
   void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
                            ArrayRef<const Attr *> Attrs = None);
 
+  /// Returns calculated size of the specified type.
+  llvm::Value *getTypeSize(QualType Ty);
   LValue InitCapturedStruct(const CapturedStmt &S);
   llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
   llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
@@ -2294,7 +2513,17 @@ public:
   /// it is the last iteration of the loop code in associated directive, or to
   /// 'i1 false' otherwise. If this item is nullptr, no final check is required.
   void EmitOMPLastprivateClauseFinal(const OMPExecutableDirective &D,
+                                     bool NoFinals,
                                      llvm::Value *IsLastIterCond = nullptr);
+  /// Emit initial code for linear clauses.
+  void EmitOMPLinearClause(const OMPLoopDirective &D,
+                           CodeGenFunction::OMPPrivateScope &PrivateScope);
+  /// Emit final code for linear clauses.
+  /// \param CondGen Optional conditional code for final part of codegen for
+  /// linear clause.
+  void EmitOMPLinearClauseFinal(
+      const OMPLoopDirective &D,
+      const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
   /// \brief Emit initial code for reduction variables. Creates reduction copies
   /// and initializes them with the values according to OpenMP standard.
   ///
@@ -2315,6 +2544,14 @@ public:
   /// \param D Directive (possibly) with the 'linear' clause.
   void EmitOMPLinearClauseInit(const OMPLoopDirective &D);
 
+  typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/,
+                                        llvm::Value * /*OutlinedFn*/,
+                                        const OMPTaskDataTy & /*Data*/)>
+      TaskGenTy;
+  void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
+                                 const RegionCodeGenTy &BodyGen,
+                                 const TaskGenTy &TaskGen, OMPTaskDataTy &Data);
+
   void EmitOMPParallelDirective(const OMPParallelDirective &S);
   void EmitOMPSimdDirective(const OMPSimdDirective &S);
   void EmitOMPForDirective(const OMPForDirective &S);
@@ -2337,14 +2574,36 @@ public:
   void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
   void EmitOMPTargetDirective(const OMPTargetDirective &S);
   void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S);
+  void EmitOMPTargetEnterDataDirective(const OMPTargetEnterDataDirective &S);
+  void EmitOMPTargetExitDataDirective(const OMPTargetExitDataDirective &S);
+  void EmitOMPTargetUpdateDirective(const OMPTargetUpdateDirective &S);
+  void EmitOMPTargetParallelDirective(const OMPTargetParallelDirective &S);
+  void
+  EmitOMPTargetParallelForDirective(const OMPTargetParallelForDirective &S);
   void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
   void
   EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S);
   void EmitOMPCancelDirective(const OMPCancelDirective &S);
+  void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S);
   void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
   void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
-
+  void EmitOMPDistributeLoop(const OMPDistributeDirective &S);
+  void EmitOMPDistributeParallelForDirective(
+      const OMPDistributeParallelForDirective &S);
+  void EmitOMPDistributeParallelForSimdDirective(
+      const OMPDistributeParallelForSimdDirective &S);
+  void EmitOMPDistributeSimdDirective(const OMPDistributeSimdDirective &S);
+  void EmitOMPTargetParallelForSimdDirective(
+      const OMPTargetParallelForSimdDirective &S);
+
+  /// Emit outlined function for the target directive.
+  static std::pair<llvm::Function * /*OutlinedFn*/,
+                   llvm::Constant * /*OutlinedFnID*/>
+  EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM,
+                                         const OMPTargetDirective &S,
+                                         StringRef ParentName,
+                                         bool IsOffloadEntry);
   /// \brief Emit inner loop of the worksharing/simd construct.
   ///
   /// \param S Directive, for which the inner loop must be emitted.
@@ -2362,24 +2621,35 @@ public:
       const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen);
 
   JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind);
+  /// Emit initial code for loop counters of loop-based directives.
+  void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S,
+                                  OMPPrivateScope &LoopScope);
 
 private:
-
   /// Helpers for the OpenMP loop directives.
   void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
   void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false);
-  void EmitOMPSimdFinal(const OMPLoopDirective &D);
+  void EmitOMPSimdFinal(
+      const OMPLoopDirective &D,
+      const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
   /// \brief Emit code for the worksharing loop-based directive.
   /// \return true, if this construct has any lastprivate clause, false -
   /// otherwise.
   bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
-  void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
+  void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered,
+      const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
+      Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
+  void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind,
                            bool IsMonotonic, const OMPLoopDirective &S,
                            OMPPrivateScope &LoopScope, bool Ordered, Address LB,
                            Address UB, Address ST, Address IL,
                            llvm::Value *Chunk);
+  void EmitOMPDistributeOuterLoop(
+      OpenMPDistScheduleClauseKind ScheduleKind,
+      const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
+      Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
   /// \brief Emit code for sections directive.
-  OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S);
+  void EmitSections(const OMPExecutableDirective &S);
 
 public:
 
@@ -2430,7 +2700,6 @@ public:
   void EmitAtomicInit(Expr *E, LValue lvalue);
 
   bool LValueIsSuitableForInlineAtomic(LValue Src);
-  bool typeIsSuitableForInlineAtomic(QualType Ty, bool IsVolatile) const;
 
   RValue EmitAtomicLoad(LValue LV, SourceLocation SL,
                         AggValueSlot Slot = AggValueSlot::ignored());
@@ -2446,8 +2715,10 @@ public:
 
   std::pair<RValue, llvm::Value *> EmitAtomicCompareExchange(
       LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc,
-      llvm::AtomicOrdering Success = llvm::SequentiallyConsistent,
-      llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent,
+      llvm::AtomicOrdering Success =
+          llvm::AtomicOrdering::SequentiallyConsistent,
+      llvm::AtomicOrdering Failure =
+          llvm::AtomicOrdering::SequentiallyConsistent,
       bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());
 
   void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
@@ -2680,11 +2951,10 @@ public:
                               ReturnValueSlot ReturnValue, llvm::Value *This,
                               llvm::Value *ImplicitParam,
                               QualType ImplicitParamTy, const CallExpr *E);
-  RValue EmitCXXStructorCall(const CXXMethodDecl *MD, llvm::Value *Callee,
-                             ReturnValueSlot ReturnValue, llvm::Value *This,
-                             llvm::Value *ImplicitParam,
-                             QualType ImplicitParamTy, const CallExpr *E,
-                             StructorType Type);
+  RValue EmitCXXDestructorCall(const CXXDestructorDecl *DD, llvm::Value *Callee,
+                               llvm::Value *This, llvm::Value *ImplicitParam,
+                               QualType ImplicitParamTy, const CallExpr *E,
+                               StructorType Type);
   RValue EmitCXXMemberCallExpr(const CXXMemberCallExpr *E,
                                ReturnValueSlot ReturnValue);
   RValue EmitCXXMemberOrOperatorMemberCallExpr(const CallExpr *CE,
@@ -2708,6 +2978,8 @@ public:
   RValue EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
                                 ReturnValueSlot ReturnValue);
 
+  RValue EmitCUDADevicePrintfCallExpr(const CallExpr *E,
+                                      ReturnValueSlot ReturnValue);
 
   RValue EmitBuiltinExpr(const FunctionDecl *FD,
                          unsigned BuiltinID, const CallExpr *E,
@@ -2798,19 +3070,25 @@ public:
   llvm::Value *EmitARCAutoreleaseReturnValue(llvm::Value *value);
   llvm::Value *EmitARCRetainAutoreleaseReturnValue(llvm::Value *value);
   llvm::Value *EmitARCRetainAutoreleasedReturnValue(llvm::Value *value);
+  llvm::Value *EmitARCUnsafeClaimAutoreleasedReturnValue(llvm::Value *value);
 
   std::pair<LValue,llvm::Value*>
   EmitARCStoreAutoreleasing(const BinaryOperator *e);
   std::pair<LValue,llvm::Value*>
   EmitARCStoreStrong(const BinaryOperator *e, bool ignored);
+  std::pair<LValue,llvm::Value*>
+  EmitARCStoreUnsafeUnretained(const BinaryOperator *e, bool ignored);
 
   llvm::Value *EmitObjCThrowOperand(const Expr *expr);
   llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr);
   llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr);
 
   llvm::Value *EmitARCExtendBlockObject(const Expr *expr);
+  llvm::Value *EmitARCReclaimReturnedObject(const Expr *e,
+                                            bool allowUnsafeClaim);
   llvm::Value *EmitARCRetainScalarExpr(const Expr *expr);
   llvm::Value *EmitARCRetainAutoreleaseScalarExpr(const Expr *expr);
+  llvm::Value *EmitARCUnsafeUnretainedScalarExpr(const Expr *expr);
 
   void EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values);
 
@@ -2973,17 +3251,23 @@ public:
   /// If the statement (recursively) contains a switch or loop with a break
   /// inside of it, this is fine.
   static bool containsBreak(const Stmt *S);
+
+  /// Determine if the given statement might introduce a declaration into the
+  /// current scope, by being a (possibly-labelled) DeclStmt.
+  static bool mightAddDeclToScope(const Stmt *S);
   
   /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
   /// to a constant, or if it does but contains a label, return false.  If it
   /// constant folds return true and set the boolean result in Result.
-  bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result);
+  bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result,
+                                    bool AllowLabels = false);
 
   /// ConstantFoldsToSimpleInteger - If the specified expression does not fold
   /// to a constant, or if it does but contains a label, return false.  If it
   /// constant folds return true and set the folded value.
-  bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result);
-  
+  bool ConstantFoldsToSimpleInteger(const Expr *Cond, llvm::APSInt &Result,
+                                    bool AllowLabels = false);
+
   /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an
   /// if statement) to the specified blocks.  Based on the condition, this might
   /// try to simplify the codegen of the conditional based on the branch.
@@ -3013,8 +3297,9 @@ public:
 
   /// \brief Emit a slow path cross-DSO CFI check which calls __cfi_slowpath
   /// if Cond if false.
-  void EmitCfiSlowPathCheck(llvm::Value *Cond, llvm::ConstantInt *TypeId,
-                            llvm::Value *Ptr);
+  void EmitCfiSlowPathCheck(SanitizerMask Kind, llvm::Value *Cond,
+                            llvm::ConstantInt *TypeId, llvm::Value *Ptr,
+                            ArrayRef<llvm::Constant *> StaticArgs);
 
   /// \brief Create a basic block that will call the trap intrinsic, and emit a
   /// conditional branch to it, for the -ftrapv checks.
@@ -3024,6 +3309,9 @@ public:
   /// "trap-func-name" if specified.
   llvm::CallInst *EmitTrapCall(llvm::Intrinsic::ID IntrID);
 
+  /// \brief Emit a cross-DSO CFI failure handling function.
+  void EmitCfiCheckFail();
+
   /// \brief Create a check for a function parameter that may potentially be
   /// declared as non-null.
   void EmitNonNullArgCheck(RValue RV, QualType ArgType, SourceLocation ArgLoc,
@@ -3062,7 +3350,7 @@ private:
   ///
   /// \param AI - The first function argument of the expansion.
   void ExpandTypeFromArgs(QualType Ty, LValue Dst,
-                          SmallVectorImpl<llvm::Argument *>::iterator &AI);
+                          SmallVectorImpl<llvm::Value *>::iterator &AI);
 
   /// ExpandTypeToArgs - Expand an RValue \arg RV, with the LLVM type for \arg
   /// Ty, into individual arguments on the provided vector \arg IRCallArgs,
@@ -3189,6 +3477,8 @@ public:
   Address EmitPointerWithAlignment(const Expr *Addr,
                                    AlignmentSource *Source = nullptr);
 
+  void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);
+
 private:
   QualType getVarArgType(const Expr *Arg);
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
index 97b1662..0161cfb 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp
@@ -20,6 +20,7 @@
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
+#include "CGOpenMPRuntimeNVPTX.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "CodeGenTBAA.h"
@@ -86,17 +87,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
     : Context(C), LangOpts(C.getLangOpts()), HeaderSearchOpts(HSO),
       PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
       Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
-      VMContext(M.getContext()), TBAA(nullptr), TheTargetCodeGenInfo(nullptr),
-      Types(*this), VTables(*this), ObjCRuntime(nullptr),
-      OpenCLRuntime(nullptr), OpenMPRuntime(nullptr), CUDARuntime(nullptr),
-      DebugInfo(nullptr), ObjCData(nullptr),
-      NoObjCARCExceptionsMetadata(nullptr), PGOReader(nullptr),
-      CFConstantStringClassRef(nullptr), ConstantStringClassRef(nullptr),
-      NSConstantStringType(nullptr), NSConcreteGlobalBlock(nullptr),
-      NSConcreteStackBlock(nullptr), BlockObjectAssign(nullptr),
-      BlockObjectDispose(nullptr), BlockDescriptorType(nullptr),
-      GenericBlockLiteralType(nullptr), LifetimeStartFn(nullptr),
-      LifetimeEndFn(nullptr), SanitizerMD(new SanitizerMetadata(*this)) {
+      VMContext(M.getContext()), Types(*this), VTables(*this),
+      SanitizerMD(new SanitizerMetadata(*this)) {
 
   // Initialize the type cache.
   llvm::LLVMContext &LLVMContext = M.getContext();
@@ -132,29 +124,30 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
   // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0.
   if (LangOpts.Sanitize.has(SanitizerKind::Thread) ||
       (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0))
-    TBAA = new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
-                           getCXXABI().getMangleContext());
+    TBAA.reset(new CodeGenTBAA(Context, VMContext, CodeGenOpts, getLangOpts(),
+                               getCXXABI().getMangleContext()));
 
   // If debug info or coverage generation is enabled, create the CGDebugInfo
   // object.
-  if (CodeGenOpts.getDebugInfo() != CodeGenOptions::NoDebugInfo ||
-      CodeGenOpts.EmitGcovArcs ||
-      CodeGenOpts.EmitGcovNotes)
-    DebugInfo = new CGDebugInfo(*this);
+  if (CodeGenOpts.getDebugInfo() != codegenoptions::NoDebugInfo ||
+      CodeGenOpts.EmitGcovArcs || CodeGenOpts.EmitGcovNotes)
+    DebugInfo.reset(new CGDebugInfo(*this));
 
   Block.GlobalUniqueCount = 0;
 
   if (C.getLangOpts().ObjC1)
-    ObjCData = new ObjCEntrypoints();
+    ObjCData.reset(new ObjCEntrypoints());
 
-  if (!CodeGenOpts.InstrProfileInput.empty()) {
-    auto ReaderOrErr =
-        llvm::IndexedInstrProfReader::create(CodeGenOpts.InstrProfileInput);
-    if (std::error_code EC = ReaderOrErr.getError()) {
+  if (CodeGenOpts.hasProfileClangUse()) {
+    auto ReaderOrErr = llvm::IndexedInstrProfReader::create(
+        CodeGenOpts.ProfileInstrumentUsePath);
+    if (auto E = ReaderOrErr.takeError()) {
       unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
                                               "Could not read profile %0: %1");
-      getDiags().Report(DiagID) << CodeGenOpts.InstrProfileInput
-                                << EC.message();
+      llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
+        getDiags().Report(DiagID) << CodeGenOpts.ProfileInstrumentUsePath
+                                  << EI.message();
+      });
     } else
       PGOReader = std::move(ReaderOrErr.get());
   }
@@ -165,16 +158,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
     CoverageMapping.reset(new CoverageMappingModuleGen(*this, *CoverageInfo));
 }
 
-CodeGenModule::~CodeGenModule() {
-  delete ObjCRuntime;
-  delete OpenCLRuntime;
-  delete OpenMPRuntime;
-  delete CUDARuntime;
-  delete TheTargetCodeGenInfo;
-  delete TBAA;
-  delete DebugInfo;
-  delete ObjCData;
-}
+CodeGenModule::~CodeGenModule() {}
 
 void CodeGenModule::createObjCRuntime() {
   // This is just isGNUFamily(), but we want to force implementors of
@@ -183,29 +167,42 @@ void CodeGenModule::createObjCRuntime() {
   case ObjCRuntime::GNUstep:
   case ObjCRuntime::GCC:
   case ObjCRuntime::ObjFW:
-    ObjCRuntime = CreateGNUObjCRuntime(*this);
+    ObjCRuntime.reset(CreateGNUObjCRuntime(*this));
     return;
 
   case ObjCRuntime::FragileMacOSX:
   case ObjCRuntime::MacOSX:
   case ObjCRuntime::iOS:
   case ObjCRuntime::WatchOS:
-    ObjCRuntime = CreateMacObjCRuntime(*this);
+    ObjCRuntime.reset(CreateMacObjCRuntime(*this));
     return;
   }
   llvm_unreachable("bad runtime kind");
 }
 
 void CodeGenModule::createOpenCLRuntime() {
-  OpenCLRuntime = new CGOpenCLRuntime(*this);
+  OpenCLRuntime.reset(new CGOpenCLRuntime(*this));
 }
 
 void CodeGenModule::createOpenMPRuntime() {
-  OpenMPRuntime = new CGOpenMPRuntime(*this);
+  // Select a specialized code generation class based on the target, if any.
+  // If it does not exist use the default implementation.
+  switch (getTarget().getTriple().getArch()) {
+
+  case llvm::Triple::nvptx:
+  case llvm::Triple::nvptx64:
+    assert(getLangOpts().OpenMPIsDevice &&
+           "OpenMP NVPTX is only prepared to deal with device code.");
+    OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
+    break;
+  default:
+    OpenMPRuntime.reset(new CGOpenMPRuntime(*this));
+    break;
+  }
 }
 
 void CodeGenModule::createCUDARuntime() {
-  CUDARuntime = CreateNVCUDARuntime(*this);
+  CUDARuntime.reset(CreateNVCUDARuntime(*this));
 }
 
 void CodeGenModule::addReplacement(StringRef Name, llvm::Constant *C) {
@@ -259,20 +256,21 @@ void CodeGenModule::applyGlobalValReplacements() {
 
 // This is only used in aliases that we created and we know they have a
 // linear structure.
-static const llvm::GlobalObject *getAliasedGlobal(const llvm::GlobalAlias &GA) {
-  llvm::SmallPtrSet<const llvm::GlobalAlias*, 4> Visited;
-  const llvm::Constant *C = &GA;
+static const llvm::GlobalObject *getAliasedGlobal(
+    const llvm::GlobalIndirectSymbol &GIS) {
+  llvm::SmallPtrSet<const llvm::GlobalIndirectSymbol*, 4> Visited;
+  const llvm::Constant *C = &GIS;
   for (;;) {
     C = C->stripPointerCasts();
     if (auto *GO = dyn_cast<llvm::GlobalObject>(C))
       return GO;
     // stripPointerCasts will not walk over weak aliases.
-    auto *GA2 = dyn_cast<llvm::GlobalAlias>(C);
-    if (!GA2)
+    auto *GIS2 = dyn_cast<llvm::GlobalIndirectSymbol>(C);
+    if (!GIS2)
       return nullptr;
-    if (!Visited.insert(GA2).second)
+    if (!Visited.insert(GIS2).second)
       return nullptr;
-    C = GA2->getAliasee();
+    C = GIS2->getIndirectSymbol();
   }
 }
 
@@ -284,20 +282,35 @@ void CodeGenModule::checkAliases() {
   DiagnosticsEngine &Diags = getDiags();
   for (const GlobalDecl &GD : Aliases) {
     const auto *D = cast<ValueDecl>(GD.getDecl());
-    const AliasAttr *AA = D->getAttr<AliasAttr>();
+    SourceLocation Location;
+    bool IsIFunc = D->hasAttr<IFuncAttr>();
+    if (const Attr *A = D->getDefiningAttr())
+      Location = A->getLocation();
+    else
+      llvm_unreachable("Not an alias or ifunc?");
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
-    auto *Alias = cast<llvm::GlobalAlias>(Entry);
+    auto *Alias  = cast<llvm::GlobalIndirectSymbol>(Entry);
     const llvm::GlobalValue *GV = getAliasedGlobal(*Alias);
     if (!GV) {
       Error = true;
-      Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+      Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc;
     } else if (GV->isDeclaration()) {
       Error = true;
-      Diags.Report(AA->getLocation(), diag::err_alias_to_undefined);
+      Diags.Report(Location, diag::err_alias_to_undefined)
+          << IsIFunc << IsIFunc;
+    } else if (IsIFunc) {
+      // Check resolver function type.
+      llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(
+          GV->getType()->getPointerElementType());
+      assert(FTy);
+      if (!FTy->getReturnType()->isPointerTy())
+        Diags.Report(Location, diag::err_ifunc_resolver_return);
+      if (FTy->getNumParams())
+        Diags.Report(Location, diag::err_ifunc_resolver_params);
     }
 
-    llvm::Constant *Aliasee = Alias->getAliasee();
+    llvm::Constant *Aliasee = Alias->getIndirectSymbol();
     llvm::GlobalValue *AliaseeGV;
     if (auto CE = dyn_cast<llvm::ConstantExpr>(Aliasee))
       AliaseeGV = cast<llvm::GlobalValue>(CE->getOperand(0));
@@ -308,7 +321,7 @@ void CodeGenModule::checkAliases() {
       StringRef AliasSection = SA->getName();
       if (AliasSection != AliaseeGV->getSection())
         Diags.Report(SA->getLocation(), diag::warn_alias_with_section)
-            << AliasSection;
+            << AliasSection << IsIFunc << IsIFunc;
     }
 
     // We have to handle alias to weak aliases in here. LLVM itself disallows
@@ -316,13 +329,13 @@ void CodeGenModule::checkAliases() {
     // compatibility with gcc we implement it by just pointing the alias
     // to its aliasee's aliasee. We also warn, since the user is probably
     // expecting the link to be weak.
-    if (auto GA = dyn_cast<llvm::GlobalAlias>(AliaseeGV)) {
-      if (GA->mayBeOverridden()) {
-        Diags.Report(AA->getLocation(), diag::warn_alias_to_weak_alias)
-            << GV->getName() << GA->getName();
+    if (auto GA = dyn_cast<llvm::GlobalIndirectSymbol>(AliaseeGV)) {
+      if (GA->isInterposable()) {
+        Diags.Report(Location, diag::warn_alias_to_weak_alias)
+            << GV->getName() << GA->getName() << IsIFunc;
         Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-            GA->getAliasee(), Alias->getType());
-        Alias->setAliasee(Aliasee);
+            GA->getIndirectSymbol(), Alias->getType());
+        Alias->setIndirectSymbol(Aliasee);
       }
     }
   }
@@ -332,7 +345,7 @@ void CodeGenModule::checkAliases() {
   for (const GlobalDecl &GD : Aliases) {
     StringRef MangledName = getMangledName(GD);
     llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
-    auto *Alias = cast<llvm::GlobalAlias>(Entry);
+    auto *Alias = dyn_cast<llvm::GlobalIndirectSymbol>(Entry);
     Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType()));
     Alias->eraseFromParent();
   }
@@ -380,7 +393,7 @@ void CodeGenModule::Release() {
             OpenMPRuntime->emitRegistrationFunction())
       AddGlobalCtor(OpenMPRegistrationFunction, 0);
   if (PGOReader) {
-    getModule().setMaximumFunctionCount(PGOReader->getMaximumFunctionCount());
+    getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext));
     if (PGOStats.hasDiagnostics())
       PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
   }
@@ -391,7 +404,11 @@ void CodeGenModule::Release() {
   EmitDeferredUnusedCoverageMappings();
   if (CoverageMapping)
     CoverageMapping->emit();
+  if (CodeGenOpts.SanitizeCfiCrossDso)
+    CodeGenFunction(*this).EmitCfiCheckFail();
   emitLLVMUsed();
+  if (SanStats)
+    SanStats->finish();
 
   if (CodeGenOpts.Autolink &&
       (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) {
@@ -452,16 +469,19 @@ void CodeGenModule::Release() {
     getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
   }
 
-  if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
-    llvm::PICLevel::Level PL = llvm::PICLevel::Default;
-    switch (PLevel) {
-    case 0: break;
-    case 1: PL = llvm::PICLevel::Small; break;
-    case 2: PL = llvm::PICLevel::Large; break;
-    default: llvm_unreachable("Invalid PIC Level");
-    }
+  if (LangOpts.CUDAIsDevice && getTarget().getTriple().isNVPTX()) {
+    // Indicate whether __nvvm_reflect should be configured to flush denormal
+    // floating point values to 0.  (This corresponds to its "__CUDA_FTZ"
+    // property.)
+    getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz",
+                              LangOpts.CUDADeviceFlushDenormalsToZero ? 1 : 0);
+  }
 
-    getModule().setPICLevel(PL);
+  if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
+    assert(PLevel < 3 && "Invalid PIC Level");
+    getModule().setPICLevel(static_cast<llvm::PICLevel::Level>(PLevel));
+    if (Context.getLangOpts().PIE)
+      getModule().setPIELevel(static_cast<llvm::PIELevel::Level>(PLevel));
   }
 
   SimplifyPersonality();
@@ -485,6 +505,11 @@ void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
   Types.UpdateCompletedType(TD);
 }
 
+void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) {
+  // Make sure that this type is translated.
+  Types.RefreshTypeCacheForClass(RD);
+}
+
 llvm::MDNode *CodeGenModule::getTBAAInfo(QualType QTy) {
   if (!TBAA)
     return nullptr;
@@ -740,6 +765,15 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) {
                                    : llvm::GlobalValue::LinkOnceODRLinkage;
   }
 
+  if (isa<CXXConstructorDecl>(D) &&
+      cast<CXXConstructorDecl>(D)->isInheritingConstructor() &&
+      Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+    // Our approach to inheriting constructors is fundamentally different from
+    // that used by the MS ABI, so keep our inheriting constructor thunks
+    // internal rather than trying to pick an unambiguous mangling for them.
+    return llvm::GlobalValue::InternalLinkage;
+  }
+
   return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false);
 }
 
@@ -762,8 +796,7 @@ void CodeGenModule::setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F)
     F->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
 }
 
-llvm::ConstantInt *
-CodeGenModule::CreateCfiIdForTypeMetadata(llvm::Metadata *MD) {
+llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
   llvm::MDString *MDS = dyn_cast<llvm::MDString>(MD);
   if (!MDS) return nullptr;
 
@@ -882,12 +915,6 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     F->removeFnAttr(llvm::Attribute::InlineHint);
   }
 
-  if (isa<CXXConstructorDecl>(D) || isa<CXXDestructorDecl>(D))
-    F->setUnnamedAddr(true);
-  else if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
-    if (MD->isVirtual())
-      F->setUnnamedAddr(true);
-
   unsigned alignment = D->getMaxAlignment() / Context.getCharWidth();
   if (alignment)
     F->setAlignment(alignment);
@@ -970,8 +997,8 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
   }
 }
 
-void CodeGenModule::CreateFunctionBitSetEntry(const FunctionDecl *FD,
-                                              llvm::Function *F) {
+void CodeGenModule::CreateFunctionTypeMetadata(const FunctionDecl *FD,
+                                               llvm::Function *F) {
   // Only if we are checking indirect calls.
   if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall))
     return;
@@ -992,25 +1019,13 @@ void CodeGenModule::CreateFunctionBitSetEntry(const FunctionDecl *FD,
       return;
   }
 
-  llvm::NamedMDNode *BitsetsMD =
-      getModule().getOrInsertNamedMetadata("llvm.bitsets");
-
   llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType());
-  llvm::Metadata *BitsetOps[] = {
-      MD, llvm::ConstantAsMetadata::get(F),
-      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int64Ty, 0))};
-  BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps));
+  F->addTypeMetadata(0, MD);
 
   // Emit a hash-based bit set entry for cross-DSO calls.
-  if (CodeGenOpts.SanitizeCfiCrossDso) {
-    if (auto TypeId = CreateCfiIdForTypeMetadata(MD)) {
-      llvm::Metadata *BitsetOps2[] = {
-          llvm::ConstantAsMetadata::get(TypeId),
-          llvm::ConstantAsMetadata::get(F),
-          llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int64Ty, 0))};
-      BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps2));
-    }
-  }
+  if (CodeGenOpts.SanitizeCfiCrossDso)
+    if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD))
+      F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId));
 }
 
 void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
@@ -1049,13 +1064,29 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
   if (const SectionAttr *SA = FD->getAttr<SectionAttr>())
     F->setSection(SA->getName());
 
-  // A replaceable global allocation function does not act like a builtin by
-  // default, only if it is invoked by a new-expression or delete-expression.
-  if (FD->isReplaceableGlobalAllocationFunction())
+  if (FD->isReplaceableGlobalAllocationFunction()) {
+    // A replaceable global allocation function does not act like a builtin by
+    // default, only if it is invoked by a new-expression or delete-expression.
     F->addAttribute(llvm::AttributeSet::FunctionIndex,
                     llvm::Attribute::NoBuiltin);
 
-  CreateFunctionBitSetEntry(FD, F);
+    // A sane operator new returns a non-aliasing pointer.
+    // FIXME: Also add NonNull attribute to the return value
+    // for the non-nothrow forms?
+    auto Kind = FD->getDeclName().getCXXOverloadedOperator();
+    if (getCodeGenOpts().AssumeSaneOperatorNew &&
+        (Kind == OO_New || Kind == OO_Array_New))
+      F->addAttribute(llvm::AttributeSet::ReturnIndex,
+                      llvm::Attribute::NoAlias);
+  }
+
+  if (isa<CXXConstructorDecl>(FD) || isa<CXXDestructorDecl>(FD))
+    F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+  else if (const auto *MD = dyn_cast<CXXMethodDecl>(FD))
+    if (MD->isVirtual())
+      F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+
+  CreateFunctionTypeMetadata(FD, F);
 }
 
 void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) {
@@ -1227,13 +1258,13 @@ void CodeGenModule::EmitDeferred() {
   if (!DeferredVTables.empty()) {
     EmitDeferredVTables();
 
-    // Emitting a v-table doesn't directly cause more v-tables to
+    // Emitting a vtable doesn't directly cause more vtables to
     // become deferred, although it can cause functions to be
-    // emitted that then need those v-tables.
+    // emitted that then need those vtables.
     assert(DeferredVTables.empty());
   }
 
-  // Stop if we're out of both deferred v-tables and deferred declarations.
+  // Stop if we're out of both deferred vtables and deferred declarations.
   if (DeferredDeclsToEmit.empty())
     return;
 
@@ -1244,19 +1275,23 @@ void CodeGenModule::EmitDeferred() {
 
   for (DeferredGlobal &G : CurDeclsToEmit) {
     GlobalDecl D = G.GD;
-    llvm::GlobalValue *GV = G.GV;
     G.GV = nullptr;
 
     // We should call GetAddrOfGlobal with IsForDefinition set to true in order
     // to get GlobalValue with exactly the type we need, not something that
     // might had been created for another decl with the same mangled name but
     // different type.
-    // FIXME: Support for variables is not implemented yet.
-    if (isa<FunctionDecl>(D.getDecl()))
-      GV = cast<llvm::GlobalValue>(GetAddrOfGlobal(D, /*IsForDefinition=*/true));
-    else
-      if (!GV)
-        GV = GetGlobalValue(getMangledName(D));
+    llvm::GlobalValue *GV = dyn_cast<llvm::GlobalValue>(
+        GetAddrOfGlobal(D, /*IsForDefinition=*/true));
+
+    // In case of different address spaces, we may still get a cast, even with
+    // IsForDefinition equal to true. Query mangled names table to get
+    // GlobalValue.
+    if (!GV)
+      GV = GetGlobalValue(getMangledName(D));
+
+    // Make sure GetGlobalValue returned non-null.
+    assert(GV);
 
     // Check to see if we've already emitted this.  This is necessary
     // for a couple of reasons: first, decls can end up in the
@@ -1264,7 +1299,7 @@ void CodeGenModule::EmitDeferred() {
     // up with definitions in unusual ways (e.g. by an extern inline
     // function acquiring a strong function redefinition).  Just
     // ignore these cases.
-    if (GV && !GV->isDeclaration())
+    if (!GV->isDeclaration())
       continue;
 
     // Otherwise, emit the definition and move on to the next one.
@@ -1304,7 +1339,7 @@ llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) {
       new llvm::GlobalVariable(getModule(), s->getType(), true,
                                llvm::GlobalValue::PrivateLinkage, s, ".str");
   gv->setSection(AnnotationSection);
-  gv->setUnnamedAddr(true);
+  gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   AStr = gv;
   return gv;
 }
@@ -1412,6 +1447,12 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
       // Implicit template instantiations may change linkage if they are later
       // explicitly instantiated, so they should not be emitted eagerly.
       return false;
+  if (const auto *VD = dyn_cast<VarDecl>(Global))
+    if (Context.getInlineVariableDefinitionKind(VD) ==
+        ASTContext::InlineVariableDefinitionKind::WeakUnknown)
+      // A definition of an inline constexpr static data member may change
+      // linkage later if it's redeclared outside the class.
+      return false;
   // If OpenMP is enabled and threadprivates must be generated like TLS, delay
   // codegen for global variables, because they may be marked as threadprivate.
   if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS &&
@@ -1425,12 +1466,12 @@ ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor(
     const CXXUuidofExpr* E) {
   // Sema has verified that IIDSource has a __declspec(uuid()), and that its
   // well-formed.
-  StringRef Uuid = E->getUuidAsStringRef(Context);
+  StringRef Uuid = E->getUuidStr();
   std::string Name = "_GUID_" + Uuid.lower();
   std::replace(Name.begin(), Name.end(), '-', '_');
 
-  // Contains a 32-bit field.
-  CharUnits Alignment = CharUnits::fromQuantity(4);
+  // The UUID descriptor should be pointer aligned.
+  CharUnits Alignment = CharUnits::fromQuantity(PointerAlignInBytes);
 
   // Look for an existing global.
   if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name))
@@ -1491,6 +1532,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
   if (Global->hasAttr<AliasAttr>())
     return EmitAliasDefinition(GD);
 
+  // IFunc like an alias whose value is resolved at runtime by calling resolver.
+  if (Global->hasAttr<IFuncAttr>())
+    return emitIFuncDefinition(GD);
+
   // If this is CUDA, be selective about which declarations we emit.
   if (LangOpts.CUDA) {
     if (LangOpts.CUDAIsDevice) {
@@ -1500,18 +1545,32 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
           !Global->hasAttr<CUDASharedAttr>())
         return;
     } else {
-      if (!Global->hasAttr<CUDAHostAttr>() && (
-            Global->hasAttr<CUDADeviceAttr>() ||
-            Global->hasAttr<CUDAConstantAttr>() ||
-            Global->hasAttr<CUDASharedAttr>()))
+      // We need to emit host-side 'shadows' for all global
+      // device-side variables because the CUDA runtime needs their
+      // size and host-side address in order to provide access to
+      // their device-side incarnations.
+
+      // So device-only functions are the only things we skip.
+      if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() &&
+          Global->hasAttr<CUDADeviceAttr>())
         return;
+
+      assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
+             "Expected Variable or Function");
     }
   }
 
-  // If this is OpenMP device, check if it is legal to emit this global
-  // normally.
-  if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD))
-    return;
+  if (LangOpts.OpenMP) {
+    // If this is OpenMP device, check if it is legal to emit this global
+    // normally.
+    if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD))
+      return;
+    if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Global)) {
+      if (MustBeEmitted(Global))
+        EmitOMPDeclareReduction(DRD);
+      return;
+    }
+  }
 
   // Ignore declarations, they will be emitted on their first use.
   if (const auto *FD = dyn_cast<FunctionDecl>(Global)) {
@@ -1533,10 +1592,23 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
   } else {
     const auto *VD = cast<VarDecl>(Global);
     assert(VD->isFileVarDecl() && "Cannot emit local var decl as global.");
-
-    if (VD->isThisDeclarationADefinition() != VarDecl::Definition &&
-        !Context.isMSStaticDataMemberInlineDefinition(VD))
+    // We need to emit device-side global CUDA variables even if a
+    // variable does not have a definition -- we still need to define
+    // host-side shadow for it.
+    bool MustEmitForCuda = LangOpts.CUDA && !LangOpts.CUDAIsDevice &&
+                           !VD->hasDefinition() &&
+                           (VD->hasAttr<CUDAConstantAttr>() ||
+                            VD->hasAttr<CUDADeviceAttr>());
+    if (!MustEmitForCuda &&
+        VD->isThisDeclarationADefinition() != VarDecl::Definition &&
+        !Context.isMSStaticDataMemberInlineDefinition(VD)) {
+      // If this declaration may have caused an inline variable definition to
+      // change linkage, make sure that it's emitted.
+      if (Context.getInlineVariableDefinitionKind(VD) ==
+          ASTContext::InlineVariableDefinitionKind::Strong)
+        GetAddrOfGlobalVar(VD);
       return;
+    }
   }
 
   // Defer code generation to first use when possible, e.g. if this is an inline
@@ -1690,7 +1762,7 @@ void CodeGenModule::CompleteDIClassType(const CXXMethodDecl* D) {
     return;
 
   if (CGDebugInfo *DI = getModuleDebugInfo())
-    if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo) {
+    if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo) {
       const auto *ThisPtr = cast<PointerType>(D->getThisType(getContext()));
       DI->getOrCreateRecordType(ThisPtr->getPointeeType(), D->getLocation());
     }
@@ -1730,7 +1802,7 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
   }
 
   if (const auto *VD = dyn_cast<VarDecl>(D))
-    return EmitGlobalVarDefinition(VD);
+    return EmitGlobalVarDefinition(VD, !VD->hasDefinition());
   
   llvm_unreachable("Invalid argument to EmitGlobalDefinition()");
 }
@@ -1771,8 +1843,8 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
     // error.
     if (IsForDefinition && !Entry->isDeclaration()) {
       GlobalDecl OtherGD;
-      // Check that GD is not yet in ExplicitDefinitions is required to make
-      // sure that we issue an error only once.
+      // Check that GD is not yet in DiagnosedConflictingDefinitions is required
+      // to make sure that we issue an error only once.
       if (lookupRepresentativeDecl(MangledName, OtherGD) &&
           (GD.getCanonicalDecl().getDecl() !=
            OtherGD.getCanonicalDecl().getDecl()) &&
@@ -1982,10 +2054,15 @@ bool CodeGenModule::isTypeConstant(QualType Ty, bool ExcludeCtor) {
 ///
 /// If D is non-null, it specifies a decl that correspond to this.  This is used
 /// to set the attributes on the global when it is first created.
+///
+/// If IsForDefinition is true, it is guranteed that an actual global with
+/// type Ty will be returned, not conversion of a variable with the same
+/// mangled name but some other type.
 llvm::Constant *
 CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
                                      llvm::PointerType *Ty,
-                                     const VarDecl *D) {
+                                     const VarDecl *D,
+                                     bool IsForDefinition) {
   // Lookup the entry, lazily creating it if necessary.
   llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
   if (Entry) {
@@ -2001,11 +2078,34 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
     if (Entry->getType() == Ty)
       return Entry;
 
+    // If there are two attempts to define the same mangled name, issue an
+    // error.
+    if (IsForDefinition && !Entry->isDeclaration()) {
+      GlobalDecl OtherGD;
+      const VarDecl *OtherD;
+
+      // Check that D is not yet in DiagnosedConflictingDefinitions is required
+      // to make sure that we issue an error only once.
+      if (D && lookupRepresentativeDecl(MangledName, OtherGD) &&
+          (D->getCanonicalDecl() != OtherGD.getCanonicalDecl().getDecl()) &&
+          (OtherD = dyn_cast<VarDecl>(OtherGD.getDecl())) &&
+          OtherD->hasInit() &&
+          DiagnosedConflictingDefinitions.insert(D).second) {
+        getDiags().Report(D->getLocation(),
+                          diag::err_duplicate_mangled_name);
+        getDiags().Report(OtherGD.getDecl()->getLocation(),
+                          diag::note_previous_definition);
+      }
+    }
+
     // Make sure the result is of the correct type.
     if (Entry->getType()->getAddressSpace() != Ty->getAddressSpace())
       return llvm::ConstantExpr::getAddrSpaceCast(Entry, Ty);
 
-    return llvm::ConstantExpr::getBitCast(Entry, Ty);
+    // (If global is requested for a definition, we always need to create a new
+    // global, not just return a bitcast.)
+    if (!IsForDefinition)
+      return llvm::ConstantExpr::getBitCast(Entry, Ty);
   }
 
   unsigned AddrSpace = GetGlobalVarAddressSpace(D, Ty->getAddressSpace());
@@ -2014,6 +2114,20 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
       llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr,
       llvm::GlobalVariable::NotThreadLocal, AddrSpace);
 
+  // If we already created a global with the same mangled name (but different
+  // type) before, take its name and remove it from its parent.
+  if (Entry) {
+    GV->takeName(Entry);
+
+    if (!Entry->use_empty()) {
+      llvm::Constant *NewPtrForOldDecl =
+          llvm::ConstantExpr::getBitCast(GV, Entry->getType());
+      Entry->replaceAllUsesWith(NewPtrForOldDecl);
+    }
+
+    Entry->eraseFromParent();
+  }
+
   // This is the first use or definition of a mangled name.  If there is a
   // deferred decl with this name, remember that we need to emit it at the end
   // of the file.
@@ -2086,7 +2200,8 @@ CodeGenModule::GetAddrOfGlobal(GlobalDecl GD,
     return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false,
                              IsForDefinition);
   } else
-    return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()));
+    return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()), /*Ty=*/nullptr,
+                              IsForDefinition);
 }
 
 llvm::GlobalVariable *
@@ -2134,9 +2249,12 @@ CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name,
 /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the
 /// given global variable.  If Ty is non-null and if the global doesn't exist,
 /// then it will be created with the specified type instead of whatever the
-/// normal requested type would be.
+/// normal requested type would be. If IsForDefinition is true, it is guranteed
+/// that an actual global with type Ty will be returned, not conversion of a
+/// variable with the same mangled name but some other type.
 llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
-                                                  llvm::Type *Ty) {
+                                                  llvm::Type *Ty,
+                                                  bool IsForDefinition) {
   assert(D->hasGlobalStorage() && "Not a global variable");
   QualType ASTTy = D->getType();
   if (!Ty)
@@ -2146,7 +2264,7 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D,
     llvm::PointerType::get(Ty, getContext().getTargetAddressSpace(ASTTy));
 
   StringRef MangledName = getMangledName(D);
-  return GetOrCreateLLVMGlobal(MangledName, PTy, D);
+  return GetOrCreateLLVMGlobal(MangledName, PTy, D, IsForDefinition);
 }
 
 /// CreateRuntimeVariable - Create a new runtime global variable with the
@@ -2160,15 +2278,20 @@ CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty,
 void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) {
   assert(!D->getInit() && "Cannot emit definite definitions here!");
 
-  if (!MustBeEmitted(D)) {
-    // If we have not seen a reference to this variable yet, place it
-    // into the deferred declarations table to be emitted if needed
-    // later.
-    StringRef MangledName = getMangledName(D);
-    if (!GetGlobalValue(MangledName)) {
+  StringRef MangledName = getMangledName(D);
+  llvm::GlobalValue *GV = GetGlobalValue(MangledName);
+
+  // We already have a definition, not declaration, with the same mangled name.
+  // Emitting of declaration is not required (and actually overwrites emitted
+  // definition).
+  if (GV && !GV->isDeclaration())
+    return;
+
+  // If we have not seen a reference to this variable yet, place it into the
+  // deferred declarations table to be emitted if needed later.
+  if (!MustBeEmitted(D) && !GV) {
       DeferredDecls[MangledName] = D;
       return;
-    }
   }
 
   // The tentative definition is the only definition.
@@ -2182,7 +2305,7 @@ CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const {
 
 unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D,
                                                  unsigned AddrSpace) {
-  if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
+  if (D && LangOpts.CUDA && LangOpts.CUDAIsDevice) {
     if (D->hasAttr<CUDAConstantAttr>())
       AddrSpace = getContext().getTargetAddressSpace(LangAS::cuda_constant);
     else if (D->hasAttr<CUDASharedAttr>())
@@ -2259,7 +2382,9 @@ void CodeGenModule::maybeSetTrivialComdat(const Decl &D,
   GO.setComdat(TheModule.getOrInsertComdat(GO.getName()));
 }
 
-void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
+/// Pass IsTentative as true if you want to create a tentative definition.
+void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
+                                            bool IsTentative) {
   llvm::Constant *Init = nullptr;
   QualType ASTTy = D->getType();
   CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl();
@@ -2269,18 +2394,13 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   const VarDecl *InitDecl;
   const Expr *InitExpr = D->getAnyInitializer(InitDecl);
 
-  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
-  // of their declaration."
-  if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice
-      && D->hasAttr<CUDASharedAttr>()) {
-    if (InitExpr) {
-      const auto *C = dyn_cast<CXXConstructExpr>(InitExpr);
-      if (C == nullptr || !C->getConstructor()->hasTrivialBody())
-        Error(D->getLocation(),
-              "__shared__ variable cannot have an initialization.");
-    }
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization
+  // as part of their declaration."  Sema has already checked for
+  // error cases, so we just need to set Init to UndefValue.
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
+      D->hasAttr<CUDASharedAttr>())
     Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
-  } else if (!InitExpr) {
+  else if (!InitExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
     //
@@ -2318,7 +2438,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   }
 
   llvm::Type* InitType = Init->getType();
-  llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType);
+  llvm::Constant *Entry =
+      GetAddrOfGlobalVar(D, InitType, /*IsForDefinition=*/!IsTentative);
 
   // Strip off a bitcast if we got one back.
   if (auto *CE = dyn_cast<llvm::ConstantExpr>(Entry)) {
@@ -2350,7 +2471,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
     Entry->setName(StringRef());
 
     // Make a new global with the correct type, this is now guaranteed to work.
-    GV = cast<llvm::GlobalVariable>(GetAddrOfGlobalVar(D, InitType));
+    GV = cast<llvm::GlobalVariable>(
+        GetAddrOfGlobalVar(D, InitType, /*IsForDefinition=*/!IsTentative));
 
     // Replace all uses of the old global with the new global
     llvm::Constant *NewPtrForOldDecl =
@@ -2366,6 +2488,10 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, GV);
 
+  // Set the llvm linkage type as appropriate.
+  llvm::GlobalValue::LinkageTypes Linkage =
+      getLLVMLinkageVarDefinition(D, GV->isConstant());
+
   // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
   // the device. [...]"
   // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
@@ -2373,9 +2499,34 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   // Is accessible from all the threads within the grid and from the host
   // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
   // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
-  if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
-      (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
-    GV->setExternallyInitialized(true);
+  if (GV && LangOpts.CUDA) {
+    if (LangOpts.CUDAIsDevice) {
+      if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>())
+        GV->setExternallyInitialized(true);
+    } else {
+      // Host-side shadows of external declarations of device-side
+      // global variables become internal definitions. These have to
+      // be internal in order to prevent name conflicts with global
+      // host variables with the same name in a different TUs.
+      if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
+        Linkage = llvm::GlobalValue::InternalLinkage;
+
+        // Shadow variables and their properties must be registered
+        // with CUDA runtime.
+        unsigned Flags = 0;
+        if (!D->hasDefinition())
+          Flags |= CGCUDARuntime::ExternDeviceVar;
+        if (D->hasAttr<CUDAConstantAttr>())
+          Flags |= CGCUDARuntime::ConstantDeviceVar;
+        getCUDARuntime().registerDeviceVar(*GV, Flags);
+      } else if (D->hasAttr<CUDASharedAttr>())
+        // __shared__ variables are odd. Shadows do get created, but
+        // they are not registered with the CUDA runtime, so they
+        // can't really be used to access their device-side
+        // counterparts. It's not clear yet whether it's nvcc's bug or
+        // a feature, but we've got to do the same for compatibility.
+        Linkage = llvm::GlobalValue::InternalLinkage;
+    }
   }
   GV->setInitializer(Init);
 
@@ -2392,9 +2543,6 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
 
   GV->setAlignment(getContext().getDeclAlign(D).getQuantity());
 
-  // Set the llvm linkage type as appropriate.
-  llvm::GlobalValue::LinkageTypes Linkage =
-      getLLVMLinkageVarDefinition(D, GV->isConstant());
 
   // On Darwin, if the normal linkage of a C++ thread_local variable is
   // LinkOnce or Weak, we keep the normal linkage to prevent multiple
@@ -2439,7 +2587,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
 
   // Emit global variable debug information.
   if (CGDebugInfo *DI = getModuleDebugInfo())
-    if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo)
+    if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo)
       DI->EmitGlobalVariable(GV, D);
 }
 
@@ -2474,7 +2622,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
   if (shouldBeInCOMDAT(CGM, *D))
     return true;
 
-  // Declarations with a required alignment do not have common linakge in MSVC
+  // Declarations with a required alignment do not have common linkage in MSVC
   // mode.
   if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
     if (D->hasAttr<AlignedAttr>())
@@ -2535,9 +2683,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
   // explicit instantiations can occur in multiple translation units
   // and must all be equivalent. However, we are not allowed to
   // throw away these explicit instantiations.
-  if (Linkage == GVA_StrongODR)
-    return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
-                                            : llvm::Function::ExternalLinkage;
+  //
+  // We don't currently support CUDA device code spread out across multiple TUs,
+  // so say that CUDA templates are either external (for kernels) or internal.
+  // This lets llvm perform aggressive inter-procedural optimizations.
+  if (Linkage == GVA_StrongODR) {
+    if (Context.getLangOpts().AppleKext)
+      return llvm::Function::ExternalLinkage;
+    if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
+      return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
+                                          : llvm::Function::InternalLinkage;
+    return llvm::Function::WeakODRLinkage;
+  }
 
   // C++ doesn't have tentative definitions and thus cannot have common
   // linkage.
@@ -2694,6 +2851,10 @@ static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
 }
 
 void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) {
+  auto DK = VD->isThisDeclarationADefinition();
+  if (DK == VarDecl::Definition && VD->hasAttr<DLLImportAttr>())
+    return;
+
   TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind();
   // If we have a definition, this might be a deferred decl. If the
   // instantiation is explicit, make sure we emit it at the end.
@@ -2757,7 +2918,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
   StringRef MangledName = getMangledName(GD);
 
   if (AA->getAliasee() == MangledName) {
-    Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+    Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0;
     return;
   }
 
@@ -2788,7 +2949,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
 
   if (Entry) {
     if (GA->getAliasee() == Entry) {
-      Diags.Report(AA->getLocation(), diag::err_cyclic_alias);
+      Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0;
       return;
     }
 
@@ -2825,6 +2986,65 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
   setAliasAttributes(D, GA);
 }
 
+void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) {
+  const auto *D = cast<ValueDecl>(GD.getDecl());
+  const IFuncAttr *IFA = D->getAttr<IFuncAttr>();
+  assert(IFA && "Not an ifunc?");
+
+  StringRef MangledName = getMangledName(GD);
+
+  if (IFA->getResolver() == MangledName) {
+    Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1;
+    return;
+  }
+
+  // Report an error if some definition overrides ifunc.
+  llvm::GlobalValue *Entry = GetGlobalValue(MangledName);
+  if (Entry && !Entry->isDeclaration()) {
+    GlobalDecl OtherGD;
+    if (lookupRepresentativeDecl(MangledName, OtherGD) &&
+        DiagnosedConflictingDefinitions.insert(GD).second) {
+      Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name);
+      Diags.Report(OtherGD.getDecl()->getLocation(),
+                   diag::note_previous_definition);
+    }
+    return;
+  }
+
+  Aliases.push_back(GD);
+
+  llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType());
+  llvm::Constant *Resolver =
+      GetOrCreateLLVMFunction(IFA->getResolver(), DeclTy, GD,
+                              /*ForVTable=*/false);
+  llvm::GlobalIFunc *GIF =
+      llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage,
+                                "", Resolver, &getModule());
+  if (Entry) {
+    if (GIF->getResolver() == Entry) {
+      Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1;
+      return;
+    }
+    assert(Entry->isDeclaration());
+
+    // If there is a declaration in the module, then we had an extern followed
+    // by the ifunc, as in:
+    //   extern int test();
+    //   ...
+    //   int test() __attribute__((ifunc("resolver")));
+    //
+    // Remove it and replace uses of it with the ifunc.
+    GIF->takeName(Entry);
+
+    Entry->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(GIF,
+                                                          Entry->getType()));
+    Entry->eraseFromParent();
+  } else
+    GIF->setName(MangledName);
+
+  SetCommonAttributes(D, GIF);
+}
+
 llvm::Function *CodeGenModule::getIntrinsic(unsigned IID,
                                             ArrayRef<llvm::Type*> Tys) {
   return llvm::Intrinsic::getDeclaration(&getModule(), (llvm::Intrinsic::ID)IID,
@@ -2889,19 +3109,40 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty);
   llvm::Constant *Zeros[] = { Zero, Zero };
   llvm::Value *V;
-  
+
   // If we don't already have it, get __CFConstantStringClassReference.
   if (!CFConstantStringClassRef) {
     llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy);
     Ty = llvm::ArrayType::get(Ty, 0);
-    llvm::Constant *GV = CreateRuntimeVariable(Ty,
-                                           "__CFConstantStringClassReference");
+    llvm::Constant *GV =
+        CreateRuntimeVariable(Ty, "__CFConstantStringClassReference");
+
+    if (getTarget().getTriple().isOSBinFormatCOFF()) {
+      IdentifierInfo &II = getContext().Idents.get(GV->getName());
+      TranslationUnitDecl *TUDecl = getContext().getTranslationUnitDecl();
+      DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
+      llvm::GlobalValue *CGV = cast<llvm::GlobalValue>(GV);
+
+      const VarDecl *VD = nullptr;
+      for (const auto &Result : DC->lookup(&II))
+        if ((VD = dyn_cast<VarDecl>(Result)))
+          break;
+
+      if (!VD || !VD->hasAttr<DLLExportAttr>()) {
+        CGV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
+        CGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+      } else {
+        CGV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
+        CGV->setLinkage(llvm::GlobalValue::ExternalLinkage);
+      }
+    }
+
     // Decay array -> ptr
     V = llvm::ConstantExpr::getGetElementPtr(Ty, GV, Zeros);
     CFConstantStringClassRef = V;
-  }
-  else
+  } else {
     V = CFConstantStringClassRef;
+  }
 
   QualType CFTy = getContext().getCFConstantStringType();
 
@@ -2914,8 +3155,8 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
 
   // Flags.
   llvm::Type *Ty = getTypes().ConvertType(getContext().UnsignedIntTy);
-  Fields[1] = isUTF16 ? llvm::ConstantInt::get(Ty, 0x07d0) :
-    llvm::ConstantInt::get(Ty, 0x07C8);
+  Fields[1] = isUTF16 ? llvm::ConstantInt::get(Ty, 0x07d0)
+                      : llvm::ConstantInt::get(Ty, 0x07C8);
 
   // String pointer.
   llvm::Constant *C = nullptr;
@@ -2933,21 +3174,20 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   auto *GV =
       new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true,
                                llvm::GlobalValue::PrivateLinkage, C, ".str");
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   // Don't enforce the target's minimum global alignment, since the only use
   // of the string is via this class initializer.
-  // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. Without
-  // it LLVM can merge the string with a non unnamed_addr one during LTO. Doing
-  // that changes the section it ends in, which surprises ld64.
-  if (isUTF16) {
-    CharUnits Align = getContext().getTypeAlignInChars(getContext().ShortTy);
-    GV->setAlignment(Align.getQuantity());
-    GV->setSection("__TEXT,__ustring");
-  } else {
-    CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
-    GV->setAlignment(Align.getQuantity());
-    GV->setSection("__TEXT,__cstring,cstring_literals");
-  }
+  CharUnits Align = isUTF16
+                        ? getContext().getTypeAlignInChars(getContext().ShortTy)
+                        : getContext().getTypeAlignInChars(getContext().CharTy);
+  GV->setAlignment(Align.getQuantity());
+
+  // FIXME: We set the section explicitly to avoid a bug in ld64 224.1.
+  // Without it LLVM can merge the string with a non unnamed_addr one during
+  // LTO.  Doing that changes the section it ends in, which surprises ld64.
+  if (getTarget().getTriple().isOSBinFormatMachO())
+    GV->setSection(isUTF16 ? "__TEXT,__ustring"
+                           : "__TEXT,__cstring,cstring_literals");
 
   // String.
   Fields[2] =
@@ -2968,8 +3208,18 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   GV = new llvm::GlobalVariable(getModule(), C->getType(), true,
                                 llvm::GlobalVariable::PrivateLinkage, C,
                                 "_unnamed_cfstring_");
-  GV->setSection("__DATA,__cfstring");
   GV->setAlignment(Alignment.getQuantity());
+  switch (getTarget().getTriple().getObjectFormat()) {
+  case llvm::Triple::UnknownObjectFormat:
+    llvm_unreachable("unknown file format");
+  case llvm::Triple::COFF:
+  case llvm::Triple::ELF:
+    GV->setSection("cfstring");
+    break;
+  case llvm::Triple::MachO:
+    GV->setSection("__DATA,__cfstring");
+    break;
+  }
   Entry.second = GV;
 
   return ConstantAddress(GV, Alignment);
@@ -3062,7 +3312,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
 
   auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), isConstant,
                                       Linkage, C, ".str");
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   // Don't enforce the target's minimum global alignment, since the only use
   // of the string is via this class initializer.
   CharUnits Align = getContext().getTypeAlignInChars(getContext().CharTy);
@@ -3181,7 +3431,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
       M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName,
       nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace);
   GV->setAlignment(Alignment.getQuantity());
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   if (GV->isWeakForLinker()) {
     assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals");
     GV->setComdat(M.getOrInsertComdat(GV->getName()));
@@ -3528,6 +3778,12 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
   case Decl::Namespace:
     EmitNamespace(cast<NamespaceDecl>(D));
     break;
+  case Decl::CXXRecord:
+    // Emit any static data members, they may be definitions.
+    for (auto *I : cast<CXXRecordDecl>(D)->decls())
+      if (isa<VarDecl>(I) || isa<CXXRecordDecl>(I))
+        EmitTopLevelDecl(I);
+    break;
     // No code generation needed.
   case Decl::UsingShadow:
   case Decl::ClassTemplate:
@@ -3595,7 +3851,7 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     ObjCRuntime->GenerateClass(OMD);
     // Emit global variable debug information.
     if (CGDebugInfo *DI = getModuleDebugInfo())
-      if (getCodeGenOpts().getDebugInfo() >= CodeGenOptions::LimitedDebugInfo)
+      if (getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo)
         DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType(
             OMD->getClassInterface()), OMD->getLocation());
     break;
@@ -3611,6 +3867,31 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     ObjCRuntime->RegisterAlias(cast<ObjCCompatibleAliasDecl>(D));
     break;
 
+  case Decl::PragmaComment: {
+    const auto *PCD = cast<PragmaCommentDecl>(D);
+    switch (PCD->getCommentKind()) {
+    case PCK_Unknown:
+      llvm_unreachable("unexpected pragma comment kind");
+    case PCK_Linker:
+      AppendLinkerOptions(PCD->getArg());
+      break;
+    case PCK_Lib:
+      AddDependentLib(PCD->getArg());
+      break;
+    case PCK_Compiler:
+    case PCK_ExeStr:
+    case PCK_User:
+      break; // We ignore all of these.
+    }
+    break;
+  }
+
+  case Decl::PragmaDetectMismatch: {
+    const auto *PDMD = cast<PragmaDetectMismatchDecl>(D);
+    AddDetectMismatch(PDMD->getName(), PDMD->getValue());
+    break;
+  }
+
   case Decl::LinkageSpec:
     EmitLinkageSpec(cast<LinkageSpecDecl>(D));
     break;
@@ -3653,6 +3934,10 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     break;
   }
 
+  case Decl::OMPDeclareReduction:
+    EmitOMPDeclareReduction(cast<OMPDeclareReductionDecl>(D));
+    break;
+
   default:
     // Make sure we handled everything we should, every other kind is a
     // non-top-level decl.  FIXME: Would be nice to have an isTopLevelDeclKind
@@ -3775,6 +4060,10 @@ static void EmitGlobalDeclMetadata(CodeGenModule &CGM,
 /// to such functions with an unmangled name from inline assembly within the
 /// same translation unit.
 void CodeGenModule::EmitStaticExternCAliases() {
+  // Don't do anything if we're generating CUDA device code -- the NVPTX
+  // assembly target doesn't support aliases.
+  if (Context.getTargetInfo().getTriple().isNVPTX())
+    return;
   for (auto &I : StaticExternCValues) {
     IdentifierInfo *Name = I.first;
     llvm::GlobalValue *Val = I.second;
@@ -3955,27 +4244,35 @@ llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) {
   return InternalId;
 }
 
-void CodeGenModule::CreateVTableBitSetEntry(llvm::NamedMDNode *BitsetsMD,
-                                            llvm::GlobalVariable *VTable,
-                                            CharUnits Offset,
-                                            const CXXRecordDecl *RD) {
+/// Returns whether this module needs the "all-vtables" type identifier.
+bool CodeGenModule::NeedAllVtablesTypeId() const {
+  // Returns true if at least one of vtable-based CFI checkers is enabled and
+  // is not in the trapping mode.
+  return ((LangOpts.Sanitize.has(SanitizerKind::CFIVCall) &&
+           !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIVCall)) ||
+          (LangOpts.Sanitize.has(SanitizerKind::CFINVCall) &&
+           !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFINVCall)) ||
+          (LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) &&
+           !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIDerivedCast)) ||
+          (LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast) &&
+           !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIUnrelatedCast)));
+}
+
+void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable,
+                                          CharUnits Offset,
+                                          const CXXRecordDecl *RD) {
   llvm::Metadata *MD =
       CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
-  llvm::Metadata *BitsetOps[] = {
-      MD, llvm::ConstantAsMetadata::get(VTable),
-      llvm::ConstantAsMetadata::get(
-          llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))};
-  BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps));
+  VTable->addTypeMetadata(Offset.getQuantity(), MD);
 
-  if (CodeGenOpts.SanitizeCfiCrossDso) {
-    if (auto TypeId = CreateCfiIdForTypeMetadata(MD)) {
-      llvm::Metadata *BitsetOps2[] = {
-          llvm::ConstantAsMetadata::get(TypeId),
-          llvm::ConstantAsMetadata::get(VTable),
-          llvm::ConstantAsMetadata::get(
-              llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))};
-      BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps2));
-    }
+  if (CodeGenOpts.SanitizeCfiCrossDso)
+    if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD))
+      VTable->addTypeMetadata(Offset.getQuantity(),
+                              llvm::ConstantAsMetadata::get(CrossDsoTypeId));
+
+  if (NeedAllVtablesTypeId()) {
+    llvm::Metadata *MD = llvm::MDString::get(getLLVMContext(), "all-vtables");
+    VTable->addTypeMetadata(Offset.getQuantity(), MD);
   }
 }
 
@@ -4007,3 +4304,10 @@ void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
                           Target.getTargetOpts().Features);
   }
 }
+
+llvm::SanitizerStatReport &CodeGenModule::getSanStats() {
+  if (!SanStats)
+    SanStats = llvm::make_unique<llvm::SanitizerStatReport>(&getModule());
+
+  return *SanStats;
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
index fdb4d78..9490499 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.h
@@ -21,6 +21,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/AST/DeclOpenMP.h"
 #include "clang/AST/GlobalDecl.h"
 #include "clang/AST/Mangle.h"
 #include "clang/Basic/ABI.h"
@@ -33,6 +34,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Transforms/Utils/SanitizerStats.h"
 
 namespace llvm {
 class Module;
@@ -47,7 +49,6 @@ class IndexedInstrProfReader;
 }
 
 namespace clang {
-class TargetCodeGenInfo;
 class ASTContext;
 class AtomicType;
 class FunctionDecl;
@@ -91,6 +92,7 @@ class CGCUDARuntime;
 class BlockFieldFlags;
 class FunctionArgList;
 class CoverageMappingModuleGen;
+class TargetCodeGenInfo;
 
 struct OrderGlobalInits {
   unsigned int priority;
@@ -165,6 +167,9 @@ struct ObjCEntrypoints {
   /// id objc_storeWeak(id*, id);
   llvm::Constant *objc_storeWeak;
 
+  /// id objc_unsafeClaimAutoreleasedReturnValue(id);
+  llvm::Constant *objc_unsafeClaimAutoreleasedReturnValue;
+
   /// A void(void) inline asm to use to mark that the return value of
   /// a call will be immediately retain.
   llvm::InlineAsm *retainAutoreleasedReturnValueMarker;
@@ -268,9 +273,9 @@ private:
   std::unique_ptr<CGCXXABI> ABI;
   llvm::LLVMContext &VMContext;
 
-  CodeGenTBAA *TBAA;
+  std::unique_ptr<CodeGenTBAA> TBAA;
   
-  mutable const TargetCodeGenInfo *TheTargetCodeGenInfo;
+  mutable std::unique_ptr<TargetCodeGenInfo> TheTargetCodeGenInfo;
   
   // This should not be moved earlier, since its initialization depends on some
   // of the previous reference members being already initialized and also checks
@@ -280,15 +285,16 @@ private:
   /// Holds information about C++ vtables.
   CodeGenVTables VTables;
 
-  CGObjCRuntime* ObjCRuntime;
-  CGOpenCLRuntime* OpenCLRuntime;
-  CGOpenMPRuntime* OpenMPRuntime;
-  CGCUDARuntime* CUDARuntime;
-  CGDebugInfo* DebugInfo;
-  ObjCEntrypoints *ObjCData;
-  llvm::MDNode *NoObjCARCExceptionsMetadata;
+  std::unique_ptr<CGObjCRuntime> ObjCRuntime;
+  std::unique_ptr<CGOpenCLRuntime> OpenCLRuntime;
+  std::unique_ptr<CGOpenMPRuntime> OpenMPRuntime;
+  std::unique_ptr<CGCUDARuntime> CUDARuntime;
+  std::unique_ptr<CGDebugInfo> DebugInfo;
+  std::unique_ptr<ObjCEntrypoints> ObjCData;
+  llvm::MDNode *NoObjCARCExceptionsMetadata = nullptr;
   std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader;
   InstrProfStats PGOStats;
+  std::unique_ptr<llvm::SanitizerStatReport> SanStats;
 
   // A set of references that have only been seen via a weakref so far. This is
   // used to remove the weak of the reference if we ever see a direct reference
@@ -429,8 +435,8 @@ private:
   llvm::WeakVH ConstantStringClassRef;
 
   /// \brief The LLVM type corresponding to NSConstantString.
-  llvm::StructType *NSConstantStringType;
-  
+  llvm::StructType *NSConstantStringType = nullptr;
+
   /// \brief The type used to describe the state of a fast enumeration in
   /// Objective-C's for..in loop.
   QualType ObjCFastEnumerationStateType;
@@ -450,24 +456,24 @@ private:
   /// @name Cache for Blocks Runtime Globals
   /// @{
 
-  llvm::Constant *NSConcreteGlobalBlock;
-  llvm::Constant *NSConcreteStackBlock;
+  llvm::Constant *NSConcreteGlobalBlock = nullptr;
+  llvm::Constant *NSConcreteStackBlock = nullptr;
 
-  llvm::Constant *BlockObjectAssign;
-  llvm::Constant *BlockObjectDispose;
+  llvm::Constant *BlockObjectAssign = nullptr;
+  llvm::Constant *BlockObjectDispose = nullptr;
 
-  llvm::Type *BlockDescriptorType;
-  llvm::Type *GenericBlockLiteralType;
+  llvm::Type *BlockDescriptorType = nullptr;
+  llvm::Type *GenericBlockLiteralType = nullptr;
 
   struct {
     int GlobalUniqueCount;
   } Block;
 
   /// void @llvm.lifetime.start(i64 %size, i8* nocapture <ptr>)
-  llvm::Constant *LifetimeStartFn;
+  llvm::Constant *LifetimeStartFn = nullptr;
 
   /// void @llvm.lifetime.end(i64 %size, i8* nocapture <ptr>)
-  llvm::Constant *LifetimeEndFn;
+  llvm::Constant *LifetimeEndFn = nullptr;
 
   GlobalDecl initializedGlobalDecl;
 
@@ -583,7 +589,7 @@ public:
     TypeDescriptorMap[Ty] = C;
   }
 
-  CGDebugInfo *getModuleDebugInfo() { return DebugInfo; }
+  CGDebugInfo *getModuleDebugInfo() { return DebugInfo.get(); }
 
   llvm::MDNode *getNoObjCARCExceptionsMetadata() {
     if (!NoObjCARCExceptionsMetadata)
@@ -696,11 +702,14 @@ public:
   unsigned GetGlobalVarAddressSpace(const VarDecl *D, unsigned AddrSpace);
 
   /// Return the llvm::Constant for the address of the given global variable.
-  /// If Ty is non-null and if the global doesn't exist, then it will be greated
+  /// If Ty is non-null and if the global doesn't exist, then it will be created
   /// with the specified type instead of whatever the normal requested type
-  /// would be.
+  /// would be. If IsForDefinition is true, it is guranteed that an actual
+  /// global with type Ty will be returned, not conversion of a variable with
+  /// the same mangled name but some other type.
   llvm::Constant *GetAddrOfGlobalVar(const VarDecl *D,
-                                     llvm::Type *Ty = nullptr);
+                                     llvm::Type *Ty = nullptr,
+                                     bool IsForDefinition = false);
 
   /// Return the address of the given function. If Ty is non-null, then this
   /// function will use the specified type if it has to create it.
@@ -989,6 +998,8 @@ public:
 
   void EmitVTable(CXXRecordDecl *Class);
 
+  void RefreshTypeCacheForClass(const CXXRecordDecl *Class);
+
   /// \brief Appends Opts to the "Linker Options" metadata value.
   void AppendLinkerOptions(StringRef Opts);
 
@@ -1098,34 +1109,42 @@ public:
   /// \param D Threadprivate declaration.
   void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D);
 
-  /// Returns whether the given record is blacklisted from control flow
-  /// integrity checks.
-  bool IsCFIBlacklistedRecord(const CXXRecordDecl *RD);
+  /// \brief Emit a code for declare reduction construct.
+  void EmitOMPDeclareReduction(const OMPDeclareReductionDecl *D,
+                               CodeGenFunction *CGF = nullptr);
+
+  /// Returns whether the given record has hidden LTO visibility and therefore
+  /// may participate in (single-module) CFI and whole-program vtable
+  /// optimization.
+  bool HasHiddenLTOVisibility(const CXXRecordDecl *RD);
 
-  /// Emit bit set entries for the given vtable using the given layout if
-  /// vptr CFI is enabled.
-  void EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
-                               const VTableLayout &VTLayout);
+  /// Emit type metadata for the given vtable using the given layout.
+  void EmitVTableTypeMetadata(llvm::GlobalVariable *VTable,
+                              const VTableLayout &VTLayout);
 
-  /// Generate a cross-DSO type identifier for type.
-  llvm::ConstantInt *CreateCfiIdForTypeMetadata(llvm::Metadata *MD);
+  /// Generate a cross-DSO type identifier for MD.
+  llvm::ConstantInt *CreateCrossDsoCfiTypeId(llvm::Metadata *MD);
 
   /// Create a metadata identifier for the given type. This may either be an
   /// MDString (for external identifiers) or a distinct unnamed MDNode (for
   /// internal identifiers).
   llvm::Metadata *CreateMetadataIdentifierForType(QualType T);
 
-  /// Create a bitset entry for the given function and add it to BitsetsMD.
-  void CreateFunctionBitSetEntry(const FunctionDecl *FD, llvm::Function *F);
+  /// Create and attach type metadata to the given function.
+  void CreateFunctionTypeMetadata(const FunctionDecl *FD, llvm::Function *F);
 
-  /// Create a bitset entry for the given vtable and add it to BitsetsMD.
-  void CreateVTableBitSetEntry(llvm::NamedMDNode *BitsetsMD,
-                               llvm::GlobalVariable *VTable, CharUnits Offset,
-                               const CXXRecordDecl *RD);
+  /// Returns whether this module needs the "all-vtables" type identifier.
+  bool NeedAllVtablesTypeId() const;
+
+  /// Create and attach type metadata for the given vtable.
+  void AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset,
+                             const CXXRecordDecl *RD);
 
   /// \breif Get the declaration of std::terminate for the platform.
   llvm::Constant *getTerminateFn();
 
+  llvm::SanitizerStatReport &getSanStats();
+
 private:
   llvm::Constant *
   GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
@@ -1136,7 +1155,8 @@ private:
 
   llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
                                         llvm::PointerType *PTy,
-                                        const VarDecl *D);
+                                        const VarDecl *D,
+                                        bool IsForDefinition = false);
 
   void setNonAliasAttributes(const Decl *D, llvm::GlobalObject *GO);
 
@@ -1147,8 +1167,9 @@ private:
   void EmitGlobalDefinition(GlobalDecl D, llvm::GlobalValue *GV = nullptr);
 
   void EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV);
-  void EmitGlobalVarDefinition(const VarDecl *D);
+  void EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative = false);
   void EmitAliasDefinition(GlobalDecl GD);
+  void emitIFuncDefinition(GlobalDecl GD);
   void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
   void EmitObjCIvarInitializations(ObjCImplementationDecl *D);
   
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
index 2c0d93b..4eefdd7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -18,11 +18,14 @@
 #include "clang/AST/StmtVisitor.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
-#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MD5.h"
 
+static llvm::cl::opt<bool> EnableValueProfiling(
+  "enable-value-profiling", llvm::cl::ZeroOrMore,
+  llvm::cl::desc("Enable value profiling"), llvm::cl::init(false));
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -34,12 +37,14 @@ void CodeGenPGO::setFuncName(StringRef Name,
       PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
 
   // If we're generating a profile, create a variable for the name.
-  if (CGM.getCodeGenOpts().ProfileInstrGenerate)
+  if (CGM.getCodeGenOpts().hasProfileClangInstr())
     FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
 }
 
 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
   setFuncName(Fn->getName(), Fn->getLinkage());
+  // Create PGOFuncName meta data.
+  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
 }
 
 namespace {
@@ -406,7 +411,8 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
     RecordStmtCount(S);
     Visit(S->getLoopVarStmt());
     Visit(S->getRangeStmt());
-    Visit(S->getBeginEndStmt());
+    Visit(S->getBeginStmt());
+    Visit(S->getEndStmt());
 
     uint64_t ParentCount = CurrentCount;
     BreakContinueStack.push_back(BreakContinue());
@@ -607,7 +613,7 @@ uint64_t PGOHash::finalize() {
 
 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
   const Decl *D = GD.getDecl();
-  bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
+  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
   llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
   if (!InstrumentRegions && !PGOReader)
     return;
@@ -653,12 +659,18 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) {
   FunctionHash = Walker.Hash.finalize();
 }
 
-void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
+bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
   if (SkipCoverageMapping)
-    return;
-  // Don't map the functions inside the system headers
+    return true;
+
+  // Don't map the functions in system headers.
+  const auto &SM = CGM.getContext().getSourceManager();
   auto Loc = D->getBody()->getLocStart();
-  if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
+  return SM.isInSystemHeader(Loc);
+}
+
+void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
+  if (skipRegionMappingForDecl(D))
     return;
 
   std::string CoverageMapping;
@@ -679,11 +691,7 @@ void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
 void
 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
                                     llvm::GlobalValue::LinkageTypes Linkage) {
-  if (SkipCoverageMapping)
-    return;
-  // Don't map the functions inside the system headers
-  auto Loc = D->getBody()->getLocStart();
-  if (CGM.getContext().getSourceManager().isInSystemHeader(Loc))
+  if (skipRegionMappingForDecl(D))
     return;
 
   std::string CoverageMapping;
@@ -726,7 +734,7 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
 }
 
 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
-  if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap)
+  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
     return;
   if (!Builder.GetInsertBlock())
     return;
@@ -740,21 +748,76 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
                       Builder.getInt32(Counter)});
 }
 
+// This method either inserts a call to the profile run-time during
+// instrumentation or puts profile data into metadata for PGO use.
+void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
+    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
+
+  if (!EnableValueProfiling)
+    return;
+
+  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
+    return;
+
+  if (isa<llvm::Constant>(ValuePtr))
+    return;
+
+  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
+  if (InstrumentValueSites && RegionCounterMap) {
+    auto BuilderInsertPoint = Builder.saveIP();
+    Builder.SetInsertPoint(ValueSite);
+    llvm::Value *Args[5] = {
+        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
+        Builder.getInt64(FunctionHash),
+        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
+        Builder.getInt32(ValueKind),
+        Builder.getInt32(NumValueSites[ValueKind]++)
+    };
+    Builder.CreateCall(
+        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
+    Builder.restoreIP(BuilderInsertPoint);
+    return;
+  }
+
+  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
+  if (PGOReader && haveRegionCounts()) {
+    // We record the top most called three functions at each call site.
+    // Profile metadata contains "VP" string identifying this metadata
+    // as value profiling data, then a uint32_t value for the value profiling
+    // kind, a uint64_t value for the total number of times the call is
+    // executed, followed by the function hash and execution count (uint64_t)
+    // pairs for each function.
+    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
+      return;
+
+    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
+                            (llvm::InstrProfValueKind)ValueKind,
+                            NumValueSites[ValueKind]);
+
+    NumValueSites[ValueKind]++;
+  }
+}
+
 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                                   bool IsInMainFile) {
   CGM.getPGOStats().addVisited(IsInMainFile);
   RegionCounts.clear();
-  if (std::error_code EC =
-          PGOReader->getFunctionCounts(FuncName, FunctionHash, RegionCounts)) {
-    if (EC == llvm::instrprof_error::unknown_function)
+  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
+      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
+  if (auto E = RecordExpected.takeError()) {
+    auto IPE = llvm::InstrProfError::take(std::move(E));
+    if (IPE == llvm::instrprof_error::unknown_function)
       CGM.getPGOStats().addMissing(IsInMainFile);
-    else if (EC == llvm::instrprof_error::hash_mismatch)
+    else if (IPE == llvm::instrprof_error::hash_mismatch)
       CGM.getPGOStats().addMismatched(IsInMainFile);
-    else if (EC == llvm::instrprof_error::malformed)
+    else if (IPE == llvm::instrprof_error::malformed)
       // TODO: Consider a more specific warning for this case.
       CGM.getPGOStats().addMismatched(IsInMainFile);
-    RegionCounts.clear();
+    return;
   }
+  ProfRecord =
+      llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
+  RegionCounts = ProfRecord->Counts;
 }
 
 /// \brief Calculate what to divide by to scale weights.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
index 6bf29ec..d03f235 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenPGO.h
@@ -19,7 +19,9 @@
 #include "CodeGenTypes.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include <array>
 #include <memory>
 
 namespace clang {
@@ -32,10 +34,12 @@ private:
   std::string FuncName;
   llvm::GlobalVariable *FuncNameVar;
 
+  std::array <unsigned, llvm::IPVK_Last + 1> NumValueSites;
   unsigned NumRegionCounters;
   uint64_t FunctionHash;
   std::unique_ptr<llvm::DenseMap<const Stmt *, unsigned>> RegionCounterMap;
   std::unique_ptr<llvm::DenseMap<const Stmt *, uint64_t>> StmtCountMap;
+  std::unique_ptr<llvm::InstrProfRecord> ProfRecord;
   std::vector<uint64_t> RegionCounts;
   uint64_t CurrentRegionCount;
   /// \brief A flag that is set to true when this function doesn't need
@@ -44,8 +48,8 @@ private:
 
 public:
   CodeGenPGO(CodeGenModule &CGM)
-      : CGM(CGM), NumRegionCounters(0), FunctionHash(0), CurrentRegionCount(0),
-        SkipCoverageMapping(false) {}
+      : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0),
+        FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {}
 
   /// Whether or not we have PGO region data for the current function. This is
   /// false both when we have no data at all and when our data has been
@@ -87,6 +91,9 @@ public:
   /// for an unused declaration.
   void emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
                                llvm::GlobalValue::LinkageTypes Linkage);
+  // Insert instrumentation or attach profile metadata at value sites
+  void valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
+                    llvm::Instruction *ValueSite, llvm::Value *ValuePtr);
 private:
   void setFuncName(llvm::Function *Fn);
   void setFuncName(StringRef Name, llvm::GlobalValue::LinkageTypes Linkage);
@@ -96,6 +103,7 @@ private:
                                llvm::Function *Fn);
   void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                         bool IsInMainFile);
+  bool skipRegionMappingForDecl(const Decl *D);
   void emitCounterRegionMapping(const Decl *D);
 
 public:
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
index c3c925c..04224e7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -44,8 +44,12 @@ llvm::MDNode *CodeGenTBAA::getRoot() {
   // if our LLVM IR is linked with LLVM IR from a different front-end
   // (or a different version of this front-end), their TBAA trees will
   // remain distinct, and the optimizer will treat them conservatively.
-  if (!Root)
-    Root = MDHelper.createTBAARoot("Simple C/C++ TBAA");
+  if (!Root) {
+    if (Features.CPlusPlus)
+      Root = MDHelper.createTBAARoot("Simple C++ TBAA");
+    else
+      Root = MDHelper.createTBAARoot("Simple C/C++ TBAA");
+  }
 
   return Root;
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h
index 632cadd..ddb063d 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTBAA.h
@@ -15,14 +15,11 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENTBAA_H
 #define LLVM_CLANG_LIB_CODEGEN_CODEGENTBAA_H
 
+#include "clang/AST/Type.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/MDBuilder.h"
-
-namespace llvm {
-  class LLVMContext;
-  class MDNode;
-}
+#include "llvm/IR/Metadata.h"
 
 namespace clang {
   class ASTContext;
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
index 09d9bf1..ebe55c7 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -272,6 +272,17 @@ void CodeGenTypes::UpdateCompletedType(const TagDecl *TD) {
     DI->completeType(RD);
 }
 
+void CodeGenTypes::RefreshTypeCacheForClass(const CXXRecordDecl *RD) {
+  QualType T = Context.getRecordType(RD);
+  T = Context.getCanonicalType(T);
+
+  const Type *Ty = T.getTypePtr();
+  if (RecordsWithOpaqueMemberPointers.count(Ty)) {
+    TypeCache.clear();
+    RecordsWithOpaqueMemberPointers.clear();
+  }
+}
+
 static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext,
                                     const llvm::fltSemantics &format,
                                     bool UseNativeHalf = false) {
@@ -438,6 +449,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     case BuiltinType::Float:
     case BuiltinType::Double:
     case BuiltinType::LongDouble:
+    case BuiltinType::Float128:
       ResultType = getTypeForFormat(getLLVMContext(),
                                     Context.getFloatTypeSemantics(T),
                                     /* UseNativeHalf = */ false);
@@ -453,18 +465,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
       ResultType = llvm::IntegerType::get(getLLVMContext(), 128);
       break;
 
-    case BuiltinType::OCLImage1d:
-    case BuiltinType::OCLImage1dArray:
-    case BuiltinType::OCLImage1dBuffer:
-    case BuiltinType::OCLImage2d:
-    case BuiltinType::OCLImage2dArray:
-    case BuiltinType::OCLImage2dDepth:
-    case BuiltinType::OCLImage2dArrayDepth:
-    case BuiltinType::OCLImage2dMSAA:
-    case BuiltinType::OCLImage2dArrayMSAA:
-    case BuiltinType::OCLImage2dMSAADepth:
-    case BuiltinType::OCLImage2dArrayMSAADepth:
-    case BuiltinType::OCLImage3d:
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
     case BuiltinType::OCLSampler:
     case BuiltinType::OCLEvent:
     case BuiltinType::OCLClkEvent:
@@ -603,10 +606,13 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
   }
 
   case Type::MemberPointer: {
-    if (!getCXXABI().isMemberPointerConvertible(cast<MemberPointerType>(Ty)))
-      return llvm::StructType::create(getLLVMContext());
-    ResultType = 
-      getCXXABI().ConvertMemberPointerType(cast<MemberPointerType>(Ty));
+    auto *MPTy = cast<MemberPointerType>(Ty);
+    if (!getCXXABI().isMemberPointerConvertible(MPTy)) {
+      RecordsWithOpaqueMemberPointers.insert(MPTy->getClass());
+      ResultType = llvm::StructType::create(getLLVMContext());
+    } else {
+      ResultType = getCXXABI().ConvertMemberPointerType(MPTy);
+    }
     break;
   }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
index a96f23c4..5796ab8 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenTypes.h
@@ -31,7 +31,6 @@ class StructType;
 }
 
 namespace clang {
-class ABIInfo;
 class ASTContext;
 template <typename> class CanQual;
 class CXXConstructorDecl;
@@ -51,6 +50,7 @@ class Type;
 typedef CanQual<Type> CanQualType;
 
 namespace CodeGen {
+class ABIInfo;
 class CGCXXABI;
 class CGRecordLayout;
 class CodeGenModule;
@@ -162,6 +162,10 @@ class CodeGenTypes {
   /// corresponding llvm::Type.
   llvm::DenseMap<const Type *, llvm::Type *> TypeCache;
 
+  llvm::SmallSet<const Type *, 8> RecordsWithOpaqueMemberPointers;
+
+  unsigned ClangCallConvToLLVMCallConv(CallingConv CC);
+
 public:
   CodeGenTypes(CodeGenModule &cgm);
   ~CodeGenTypes();
@@ -203,6 +207,11 @@ public:
   bool isFuncTypeConvertible(const FunctionType *FT);
   bool isFuncParamTypeConvertible(QualType Ty);
 
+  /// Determine if a C++ inheriting constructor should have parameters matching
+  /// those of its inherited constructor.
+  bool inheritingCtorHasParams(const InheritedConstructor &Inherited,
+                               CXXCtorType Type);
+
   /// GetFunctionTypeForVTable - Get the LLVM function type for use in a vtable,
   /// given a CXXMethodDecl. If the method to has an incomplete return type,
   /// and/or incomplete argument types, this will return the opaque type.
@@ -214,9 +223,9 @@ public:
   /// replace the 'opaque' type we previously made for it if applicable.
   void UpdateCompletedType(const TagDecl *TD);
 
-  /// getNullaryFunctionInfo - Get the function info for a void()
-  /// function with standard CC.
-  const CGFunctionInfo &arrangeNullaryFunction();
+  /// \brief Remove stale types from the type cache when an inheritance model
+  /// gets assigned to a class.
+  void RefreshTypeCacheForClass(const CXXRecordDecl *RD);
 
   // The arrangement methods are split into three families:
   //   - those meant to drive the signature and prologue/epilogue
@@ -239,16 +248,55 @@ public:
   //   this for compatibility reasons.
 
   const CGFunctionInfo &arrangeGlobalDeclaration(GlobalDecl GD);
+
+  /// Given a function info for a declaration, return the function info
+  /// for a call with the given arguments.
+  ///
+  /// Often this will be able to simply return the declaration info.
+  const CGFunctionInfo &arrangeCall(const CGFunctionInfo &declFI,
+                                    const CallArgList &args);
+
+  /// Free functions are functions that are compatible with an ordinary
+  /// C function pointer type.
   const CGFunctionInfo &arrangeFunctionDeclaration(const FunctionDecl *FD);
+  const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
+                                                const FunctionType *Ty,
+                                                bool ChainCall);
+  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
+                                                const FunctionDecl *FD);
+  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty);
+
+  /// A nullary function is a freestanding function of type 'void ()'.
+  /// This method works for both calls and declarations.
+  const CGFunctionInfo &arrangeNullaryFunction();
+
+  /// A builtin function is a freestanding function using the default
+  /// C conventions.
+  const CGFunctionInfo &
+  arrangeBuiltinFunctionDeclaration(QualType resultType,
+                                    const FunctionArgList &args);
   const CGFunctionInfo &
-  arrangeFreeFunctionDeclaration(QualType ResTy, const FunctionArgList &Args,
-                                 const FunctionType::ExtInfo &Info,
-                                 bool isVariadic);
+  arrangeBuiltinFunctionDeclaration(CanQualType resultType,
+                                    ArrayRef<CanQualType> argTypes);
+  const CGFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType,
+                                                   const CallArgList &args);
 
+  /// Objective-C methods are C functions with some implicit parameters.
   const CGFunctionInfo &arrangeObjCMethodDeclaration(const ObjCMethodDecl *MD);
   const CGFunctionInfo &arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
                                                         QualType receiverType);
+  const CGFunctionInfo &arrangeUnprototypedObjCMessageSend(
+                                                     QualType returnType,
+                                                     const CallArgList &args);
+
+  /// Block invocation functions are C functions with an implicit parameter.
+  const CGFunctionInfo &arrangeBlockFunctionDeclaration(
+                                                 const FunctionProtoType *type,
+                                                 const FunctionArgList &args);
+  const CGFunctionInfo &arrangeBlockFunctionCall(const CallArgList &args,
+                                                 const FunctionType *type);
 
+  /// C++ methods have some special rules and also have implicit parameters.
   const CGFunctionInfo &arrangeCXXMethodDeclaration(const CXXMethodDecl *MD);
   const CGFunctionInfo &arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
                                                       StructorType Type);
@@ -256,15 +304,6 @@ public:
                                                   const CXXConstructorDecl *D,
                                                   CXXCtorType CtorKind,
                                                   unsigned ExtraArgs);
-  const CGFunctionInfo &arrangeFreeFunctionCall(const CallArgList &Args,
-                                                const FunctionType *Ty,
-                                                bool ChainCall);
-  const CGFunctionInfo &arrangeFreeFunctionCall(QualType ResTy,
-                                                const CallArgList &args,
-                                                FunctionType::ExtInfo info,
-                                                RequiredArgs required);
-  const CGFunctionInfo &arrangeBlockFunctionCall(const CallArgList &args,
-                                                 const FunctionType *type);
 
   const CGFunctionInfo &arrangeCXXMethodCall(const CallArgList &args,
                                              const FunctionProtoType *type,
@@ -272,9 +311,6 @@ public:
   const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD);
   const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD,
                                                  CXXCtorType CT);
-  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
-                                                const FunctionDecl *FD);
-  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty);
   const CGFunctionInfo &arrangeCXXMethodType(const CXXRecordDecl *RD,
                                              const FunctionProtoType *FTP,
                                              const CXXMethodDecl *MD);
@@ -290,6 +326,7 @@ public:
                                                 bool chainCall,
                                                 ArrayRef<CanQualType> argTypes,
                                                 FunctionType::ExtInfo info,
+                    ArrayRef<FunctionProtoType::ExtParameterInfo> paramInfos,
                                                 RequiredArgs args);
 
   /// \brief Compute a new LLVM record layout object for the given record.
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
index 03e22cd..b011a0f 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -15,10 +15,12 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ProfileData/CoverageMapping.h"
-#include "llvm/ProfileData/CoverageMappingReader.h"
-#include "llvm/ProfileData/CoverageMappingWriter.h"
+#include "llvm/ProfileData/Coverage/CoverageMapping.h"
+#include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
+#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/FileSystem.h"
 
@@ -128,6 +130,16 @@ public:
     return strcmp(SM.getBufferName(SM.getSpellingLoc(Loc)), "<built-in>") == 0;
   }
 
+  /// \brief Check whether \c Loc is included or expanded from \c Parent.
+  bool isNestedIn(SourceLocation Loc, FileID Parent) {
+    do {
+      Loc = getIncludeOrExpansionLoc(Loc);
+      if (Loc.isInvalid())
+        return false;
+    } while (!SM.isInFileID(Loc, Parent));
+    return true;
+  }
+
   /// \brief Get the start of \c S ignoring macro arguments and builtin macros.
   SourceLocation getStart(const Stmt *S) {
     SourceLocation Loc = S->getLocStart();
@@ -152,14 +164,17 @@ public:
   void gatherFileIDs(SmallVectorImpl<unsigned> &Mapping) {
     FileIDMapping.clear();
 
-    SmallVector<FileID, 8> Visited;
+    llvm::SmallSet<FileID, 8> Visited;
     SmallVector<std::pair<SourceLocation, unsigned>, 8> FileLocs;
     for (const auto &Region : SourceRegions) {
       SourceLocation Loc = Region.getStartLoc();
       FileID File = SM.getFileID(Loc);
-      if (std::find(Visited.begin(), Visited.end(), File) != Visited.end())
+      if (!Visited.insert(File).second)
+        continue;
+
+      // Do not map FileID's associated with system headers.
+      if (SM.isInSystemHeader(SM.getSpellingLoc(Loc)))
         continue;
-      Visited.push_back(File);
 
       unsigned Depth = 0;
       for (SourceLocation Parent = getIncludeOrExpansionLoc(Loc);
@@ -191,12 +206,6 @@ public:
     return None;
   }
 
-  /// \brief Return true if the given clang's file id has a corresponding
-  /// coverage file id.
-  bool hasExistingCoverageFileID(FileID File) const {
-    return FileIDMapping.count(File);
-  }
-
   /// \brief Gather all the regions that were skipped by the preprocessor
   /// using the constructs like #if.
   void gatherSkippedRegions() {
@@ -246,6 +255,10 @@ public:
       SourceLocation LocStart = Region.getStartLoc();
       assert(SM.getFileID(LocStart).isValid() && "region in invalid file");
 
+      // Ignore regions from system headers.
+      if (SM.isInSystemHeader(SM.getSpellingLoc(LocStart)))
+        continue;
+
       auto CovFileID = getCoverageFileID(LocStart);
       // Ignore regions that don't have a file, such as builtin macros.
       if (!CovFileID)
@@ -309,7 +322,27 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
     if (!D->hasBody())
       return;
     auto Body = D->getBody();
-    SourceRegions.emplace_back(Counter(), getStart(Body), getEnd(Body));
+    SourceLocation Start = getStart(Body);
+    SourceLocation End = getEnd(Body);
+    if (!SM.isWrittenInSameFile(Start, End)) {
+      // Walk up to find the common ancestor.
+      // Correct the locations accordingly.
+      FileID StartFileID = SM.getFileID(Start);
+      FileID EndFileID = SM.getFileID(End);
+      while (StartFileID != EndFileID && !isNestedIn(End, StartFileID)) {
+        Start = getIncludeOrExpansionLoc(Start);
+        assert(Start.isValid() &&
+               "Declaration start location not nested within a known region");
+        StartFileID = SM.getFileID(Start);
+      }
+      while (StartFileID != EndFileID) {
+        End = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(End));
+        assert(End.isValid() &&
+               "Declaration end location not nested within a known region");
+        EndFileID = SM.getFileID(End);
+      }
+    }
+    SourceRegions.emplace_back(Counter(), Start, End);
   }
 
   /// \brief Write the mapping data to the output stream
@@ -318,6 +351,9 @@ struct EmptyCoverageMappingBuilder : public CoverageMappingBuilder {
     gatherFileIDs(FileIDMapping);
     emitSourceRegions();
 
+    if (MappingRegions.empty())
+      return;
+
     CoverageMappingWriter Writer(FileIDMapping, None, MappingRegions);
     Writer.write(OS);
   }
@@ -356,10 +392,6 @@ struct CounterCoverageMappingBuilder
     return addCounters(addCounters(C1, C2), C3);
   }
 
-  Counter addCounters(Counter C1, Counter C2, Counter C3, Counter C4) {
-    return addCounters(addCounters(C1, C2, C3), C4);
-  }
-
   /// \brief Return the region counter for the given statement.
   ///
   /// This should only be called on statements that have a dedicated counter.
@@ -433,31 +465,43 @@ struct CounterCoverageMappingBuilder
     Visit(S);
     Counter ExitCount = getRegion().getCounter();
     popRegions(Index);
+
+    // The statement may be spanned by an expansion. Make sure we handle a file
+    // exit out of this expansion before moving to the next statement.
+    if (SM.isBeforeInTranslationUnit(getStart(S), S->getLocStart()))
+      MostRecentLocation = getEnd(S);
+
     return ExitCount;
   }
 
+  /// \brief Check whether a region with bounds \c StartLoc and \c EndLoc
+  /// is already added to \c SourceRegions.
+  bool isRegionAlreadyAdded(SourceLocation StartLoc, SourceLocation EndLoc) {
+    return SourceRegions.rend() !=
+           std::find_if(SourceRegions.rbegin(), SourceRegions.rend(),
+                        [&](const SourceMappingRegion &Region) {
+                          return Region.getStartLoc() == StartLoc &&
+                                 Region.getEndLoc() == EndLoc;
+                        });
+  }
+
   /// \brief Adjust the most recently visited location to \c EndLoc.
   ///
   /// This should be used after visiting any statements in non-source order.
   void adjustForOutOfOrderTraversal(SourceLocation EndLoc) {
     MostRecentLocation = EndLoc;
-    // Avoid adding duplicate regions if we have a completed region on the top
-    // of the stack and are adjusting to the end of a virtual file.
+    // The code region for a whole macro is created in handleFileExit() when
+    // it detects exiting of the virtual file of that macro. If we visited
+    // statements in non-source order, we might already have such a region
+    // added, for example, if a body of a loop is divided among multiple
+    // macros. Avoid adding duplicate regions in such case.
     if (getRegion().hasEndLoc() &&
-        MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation))
+        MostRecentLocation == getEndOfFileOrMacro(MostRecentLocation) &&
+        isRegionAlreadyAdded(getStartOfFileOrMacro(MostRecentLocation),
+                             MostRecentLocation))
       MostRecentLocation = getIncludeOrExpansionLoc(MostRecentLocation);
   }
 
-  /// \brief Check whether \c Loc is included or expanded from \c Parent.
-  bool isNestedIn(SourceLocation Loc, FileID Parent) {
-    do {
-      Loc = getIncludeOrExpansionLoc(Loc);
-      if (Loc.isInvalid())
-        return false;
-    } while (!SM.isInFileID(Loc, Parent));
-    return true;
-  }
-
   /// \brief Adjust regions and state when \c NewLoc exits a file.
   ///
   /// If moving from our most recently tracked location to \c NewLoc exits any
@@ -563,6 +607,9 @@ struct CounterCoverageMappingBuilder
     emitExpansionRegions();
     gatherSkippedRegions();
 
+    if (MappingRegions.empty())
+      return;
+
     CoverageMappingWriter Writer(VirtualFileMapping, Builder.getExpressions(),
                                  MappingRegions);
     Writer.write(OS);
@@ -579,6 +626,11 @@ struct CounterCoverageMappingBuilder
 
   void VisitDecl(const Decl *D) {
     Stmt *Body = D->getBody();
+
+    // Do not propagate region counts into system headers.
+    if (Body && SM.isInSystemHeader(SM.getSpellingLoc(getStart(Body))))
+      return;
+
     propagateCounts(getRegionCounter(Body), Body);
   }
 
@@ -769,7 +821,9 @@ struct CounterCoverageMappingBuilder
           BreakContinueStack.back().ContinueCount, BC.ContinueCount);
 
     Counter ExitCount = getRegionCounter(S);
-    pushRegion(ExitCount);
+    SourceLocation ExitLoc = getEnd(S);
+    pushRegion(ExitCount, getStart(S), ExitLoc);
+    handleFileExit(ExitLoc);
   }
 
   void VisitSwitchCase(const SwitchCase *S) {
@@ -822,7 +876,12 @@ struct CounterCoverageMappingBuilder
 
   void VisitCXXTryStmt(const CXXTryStmt *S) {
     extendRegion(S);
-    Visit(S->getTryBlock());
+    // Handle macros that generate the "try" but not the rest.
+    extendRegion(S->getTryBlock());
+
+    Counter ParentCount = getRegion().getCounter();
+    propagateCounts(ParentCount, S->getTryBlock());
+
     for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
       Visit(S->getHandler(I));
 
@@ -911,7 +970,7 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
 
 void CoverageMappingModuleGen::addFunctionMappingRecord(
     llvm::GlobalVariable *NamePtr, StringRef NameValue, uint64_t FuncHash,
-    const std::string &CoverageMapping, bool isUsed) {
+    const std::string &CoverageMapping, bool IsUsed) {
   llvm::LLVMContext &Ctx = CGM.getLLVMContext();
   if (!FunctionRecordTy) {
 #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
@@ -929,10 +988,10 @@ void CoverageMappingModuleGen::addFunctionMappingRecord(
   };
   FunctionRecords.push_back(llvm::ConstantStruct::get(
       FunctionRecordTy, makeArrayRef(FunctionRecordVals)));
-  if (!isUsed)
+  if (!IsUsed)
     FunctionNames.push_back(
         llvm::ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)));
-  CoverageMappings += CoverageMapping;
+  CoverageMappings.push_back(CoverageMapping);
 
   if (CGM.getCodeGenOpts().DumpCoverageMapping) {
     // Dump the coverage mapping data for this function by decoding the
@@ -978,8 +1037,10 @@ void CoverageMappingModuleGen::emit() {
   std::string FilenamesAndCoverageMappings;
   llvm::raw_string_ostream OS(FilenamesAndCoverageMappings);
   CoverageFilenamesSectionWriter(FilenameRefs).write(OS);
-  OS << CoverageMappings;
-  size_t CoverageMappingSize = CoverageMappings.size();
+  std::string RawCoverageMappings =
+      llvm::join(CoverageMappings.begin(), CoverageMappings.end(), "");
+  OS << RawCoverageMappings;
+  size_t CoverageMappingSize = RawCoverageMappings.size();
   size_t FilenamesSize = OS.str().size() - CoverageMappingSize;
   // Append extra zeroes if necessary to ensure that the size of the filenames
   // and coverage mappings is a multiple of 8.
@@ -1035,7 +1096,7 @@ void CoverageMappingModuleGen::emit() {
     // to pass the list of names referenced to codegen.
     new llvm::GlobalVariable(CGM.getModule(), NamesArrTy, true,
                              llvm::GlobalValue::InternalLinkage, NamesArrVal,
-                             llvm::getCoverageNamesVarName());
+                             llvm::getCoverageUnusedNamesVarName());
   }
 }
 
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
index 9ae2bcf..c202fe8 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CoverageMappingGen.h
@@ -56,7 +56,7 @@ class CoverageMappingModuleGen {
   std::vector<llvm::Constant *> FunctionRecords;
   std::vector<llvm::Constant *> FunctionNames;
   llvm::StructType *FunctionRecordTy;
-  std::string CoverageMappings;
+  std::vector<std::string> CoverageMappings;
 
 public:
   CoverageMappingModuleGen(CodeGenModule &CGM, CoverageSourceInfo &SourceInfo)
@@ -72,7 +72,7 @@ public:
                                 StringRef FunctionNameValue,
                                 uint64_t FunctionHash,
                                 const std::string &CoverageMapping,
-                                bool isUsed = true);
+                                bool IsUsed = true);
 
   /// \brief Emit the coverage mapping data for a translation unit.
   void emit();
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
index 85cd154..4717a66 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/EHScopeStack.h
@@ -89,7 +89,10 @@ enum CleanupKind : unsigned {
   InactiveCleanup = 0x4,
   InactiveEHCleanup = EHCleanup | InactiveCleanup,
   InactiveNormalCleanup = NormalCleanup | InactiveCleanup,
-  InactiveNormalAndEHCleanup = NormalAndEHCleanup | InactiveCleanup
+  InactiveNormalAndEHCleanup = NormalAndEHCleanup | InactiveCleanup,
+
+  LifetimeMarker = 0x8,
+  NormalEHLifetimeMarker = LifetimeMarker | NormalAndEHCleanup,
 };
 
 /// A stack of scopes which respond to exceptions, including cleanups
@@ -341,9 +344,7 @@ public:
   /// Determines whether the exception-scopes stack is empty.
   bool empty() const { return StartOfData == EndOfBuffer; }
 
-  bool requiresLandingPad() const {
-    return InnermostEHScope != stable_end();
-  }
+  bool requiresLandingPad() const;
 
   /// Determines whether there are any normal cleanups on the stack.
   bool hasNormalCleanups() const {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
index e02c8dc..6051594 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -154,17 +154,9 @@ public:
                                Address Ptr, QualType ElementType,
                                const CXXDestructorDecl *Dtor) override;
 
-  /// Itanium says that an _Unwind_Exception has to be "double-word"
-  /// aligned (and thus the end of it is also so-aligned), meaning 16
-  /// bytes.  Of course, that was written for the actual Itanium,
-  /// which is a 64-bit platform.  Classically, the ABI doesn't really
-  /// specify the alignment on other platforms, but in practice
-  /// libUnwind declares the struct with __attribute__((aligned)), so
-  /// we assume that alignment here.  (It's generally 16 bytes, but
-  /// some targets overwrite it.)
   CharUnits getAlignmentOfExnObject() {
-    auto align = CGM.getContext().getTargetDefaultAlignForAttributeAligned();
-    return CGM.getContext().toCharUnitsFromBits(align);
+    unsigned Align = CGM.getContext().getTargetInfo().getExnObjectAlignment();
+    return CGM.getContext().toCharUnitsFromBits(Align);
   }
 
   void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
@@ -451,6 +443,7 @@ private:
            (isa<CXXDestructorDecl>(GD.getDecl()) &&
             GD.getDtorType() != Dtor_Deleting);
   }
+  bool canCallMismatchedFunctionType() const override { return false; }
 };
 }
 
@@ -1496,7 +1489,8 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
       DC->getParent()->isTranslationUnit())
     EmitFundamentalRTTIDescriptors();
 
-  CGM.EmitVTableBitSetEntries(VTable, VTLayout);
+  if (!VTable->isDeclarationForLinker())
+    CGM.EmitVTableTypeMetadata(VTable, VTLayout);
 }
 
 bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
@@ -1528,8 +1522,8 @@ ItaniumCXXABI::getVTableAddressPoint(BaseSubobject Base,
                               .getVTableLayout(VTableClass)
                               .getAddressPoint(Base);
   llvm::Value *Indices[] = {
-    llvm::ConstantInt::get(CGM.Int64Ty, 0),
-    llvm::ConstantInt::get(CGM.Int64Ty, AddressPoint)
+    llvm::ConstantInt::get(CGM.Int32Ty, 0),
+    llvm::ConstantInt::get(CGM.Int32Ty, AddressPoint)
   };
 
   return llvm::ConstantExpr::getInBoundsGetElementPtr(VTable->getValueType(),
@@ -1568,7 +1562,7 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   if (VTable)
     return VTable;
 
-  // Queue up this v-table for possible deferred emission.
+  // Queue up this vtable for possible deferred emission.
   CGM.addDeferredVTable(RD);
 
   SmallString<256> Name;
@@ -1581,7 +1575,7 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
 
   VTable = CGM.CreateOrReplaceCXXRuntimeVariable(
       Name, ArrayType, llvm::GlobalValue::ExternalLinkage);
-  VTable->setUnnamedAddr(true);
+  VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   if (RD->hasAttr<DLLImportAttr>())
     VTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
@@ -1601,14 +1595,18 @@ llvm::Value *ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
   auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
   llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent());
 
-  if (CGF.SanOpts.has(SanitizerKind::CFIVCall))
-    CGF.EmitVTablePtrCheckForCall(MethodDecl, VTable,
-                                  CodeGenFunction::CFITCK_VCall, Loc);
-
   uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
-  llvm::Value *VFuncPtr =
-      CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
-  return CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+  if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
+    return CGF.EmitVTableTypeCheckedLoad(
+        MethodDecl->getParent(), VTable,
+        VTableIndex * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
+  } else {
+    CGF.EmitTypeMetadataCodeForVCall(MethodDecl->getParent(), VTable, Loc);
+
+    llvm::Value *VFuncPtr =
+        CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
+    return CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+  }
 }
 
 llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
@@ -1913,10 +1911,18 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
                                     bool shouldPerformInit) {
   CGBuilderTy &Builder = CGF.Builder;
 
-  // We only need to use thread-safe statics for local non-TLS variables;
-  // global initialization is always single-threaded.
+  // Inline variables that weren't instantiated from variable templates have
+  // partially-ordered initialization within their translation unit.
+  bool NonTemplateInline =
+      D.isInline() &&
+      !isTemplateInstantiation(D.getTemplateSpecializationKind());
+
+  // We only need to use thread-safe statics for local non-TLS variables and
+  // inline variables; other global initialization is always single-threaded
+  // or (through lazy dynamic loading in multiple threads) unsequenced.
   bool threadsafe = getContext().getLangOpts().ThreadsafeStatics &&
-                    D.isLocalVarDecl() && !D.getTLSKind();
+                    (D.isLocalVarDecl() || NonTemplateInline) &&
+                    !D.getTLSKind();
 
   // If we have a global variable with internal linkage and thread-safe statics
   // are disabled, we can just let the guard variable be of type i8.
@@ -1970,7 +1976,11 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
     if (!D.isLocalVarDecl() && C &&
         CGM.getTarget().getTriple().isOSBinFormatELF()) {
       guard->setComdat(C);
-      CGF.CurFn->setComdat(C);
+      // An inline variable's guard function is run from the per-TU
+      // initialization function, not via a dedicated global ctor function, so
+      // we can't put it in a comdat.
+      if (!NonTemplateInline)
+        CGF.CurFn->setComdat(C);
     } else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) {
       guard->setComdat(CGM.getModule().getOrInsertComdat(guard->getName()));
     }
@@ -2008,7 +2018,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   //
   // In LLVM, we do this by marking the load Acquire.
   if (threadsafe)
-    LI->setAtomic(llvm::Acquire);
+    LI->setAtomic(llvm::AtomicOrdering::Acquire);
 
   // For ARM, we should only check the first bit, rather than the entire byte:
   //
@@ -2178,17 +2188,28 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
     getMangleContext().mangleItaniumThreadLocalWrapper(VD, Out);
   }
 
+  // FIXME: If VD is a definition, we should regenerate the function attributes
+  // before returning.
   if (llvm::Value *V = CGM.getModule().getNamedValue(WrapperName))
     return cast<llvm::Function>(V);
 
-  llvm::Type *RetTy = Val->getType();
-  if (VD->getType()->isReferenceType())
-    RetTy = RetTy->getPointerElementType();
+  QualType RetQT = VD->getType();
+  if (RetQT->isReferenceType())
+    RetQT = RetQT.getNonReferenceType();
+
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
+      getContext().getPointerType(RetQT), FunctionArgList());
 
-  llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, false);
+  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FI);
   llvm::Function *Wrapper =
       llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM),
                              WrapperName.str(), &CGM.getModule());
+
+  CGM.SetLLVMFunctionAttributes(nullptr, FI, Wrapper);
+
+  if (VD->hasDefinition())
+    CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
+
   // Always resolve references to the wrapper at link time.
   if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) &&
       !llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) &&
@@ -2227,6 +2248,11 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     CodeGenFunction(CGM)
         .GenerateCXXGlobalInitFunc(InitFunc, CXXThreadLocalInits,
                                    Address(Guard, GuardAlign));
+    // On Darwin platforms, use CXX_FAST_TLS calling convention.
+    if (CGM.getTarget().getTriple().isOSDarwin()) {
+      InitFunc->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
+      InitFunc->addFnAttr(llvm::Attribute::NoUnwind);
+    }
   }
   for (const VarDecl *VD : CXXThreadLocals) {
     llvm::GlobalVariable *Var =
@@ -2264,6 +2290,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
       Init = llvm::Function::Create(
           FnTy, llvm::GlobalVariable::ExternalWeakLinkage, InitFnName.str(),
           &CGM.getModule());
+      const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
+      CGM.SetLLVMFunctionAttributes(nullptr, FI, cast<llvm::Function>(Init));
     }
 
     if (Init)
@@ -2274,8 +2302,11 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
     CGBuilderTy Builder(CGM, Entry);
     if (InitIsInitFunc) {
-      if (Init)
-        Builder.CreateCall(Init);
+      if (Init) {
+        llvm::CallInst *CallVal = Builder.CreateCall(Init);
+        if (isThreadWrapperReplaceable(VD, CGM))
+          CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
+      }
     } else {
       // Don't know whether we have an init function. Call it if it exists.
       llvm::Value *Have = Builder.CreateIsNotNull(Init);
@@ -2491,6 +2522,11 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
   //   long, unsigned long, long long, unsigned long long, float, double,
   //   long double, char16_t, char32_t, and the IEEE 754r decimal and
   //   half-precision floating point types.
+  //
+  // GCC also emits RTTI for __int128.
+  // FIXME: We do not emit RTTI information for decimal types here.
+
+  // Types added here must also be added to EmitFundamentalRTTIDescriptors.
   switch (Ty->getKind()) {
     case BuiltinType::Void:
     case BuiltinType::NullPtr:
@@ -2513,29 +2549,23 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
     case BuiltinType::Float:
     case BuiltinType::Double:
     case BuiltinType::LongDouble:
+    case BuiltinType::Float128:
     case BuiltinType::Char16:
     case BuiltinType::Char32:
     case BuiltinType::Int128:
     case BuiltinType::UInt128:
-    case BuiltinType::OCLImage1d:
-    case BuiltinType::OCLImage1dArray:
-    case BuiltinType::OCLImage1dBuffer:
-    case BuiltinType::OCLImage2d:
-    case BuiltinType::OCLImage2dArray:
-    case BuiltinType::OCLImage2dDepth:
-    case BuiltinType::OCLImage2dArrayDepth:
-    case BuiltinType::OCLImage2dMSAA:
-    case BuiltinType::OCLImage2dArrayMSAA:
-    case BuiltinType::OCLImage2dMSAADepth:
-    case BuiltinType::OCLImage2dArrayMSAADepth:
-    case BuiltinType::OCLImage3d:
+      return true;
+
+#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
+    case BuiltinType::Id:
+#include "clang/Basic/OpenCLImageTypes.def"
     case BuiltinType::OCLSampler:
     case BuiltinType::OCLEvent:
     case BuiltinType::OCLClkEvent:
     case BuiltinType::OCLQueue:
     case BuiltinType::OCLNDRange:
     case BuiltinType::OCLReserveID:
-      return true;
+      return false;
 
     case BuiltinType::Dependent:
 #define BUILTIN_TYPE(Id, SingletonId)
@@ -2864,7 +2894,7 @@ static llvm::GlobalVariable::LinkageTypes getTypeInfoLinkage(CodeGenModule &CGM,
 
 llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
   // We want to operate on the canonical type.
-  Ty = CGM.getContext().getCanonicalType(Ty);
+  Ty = Ty.getCanonicalType();
 
   // Check if we've already emitted an RTTI descriptor for this type.
   SmallString<256> Name;
@@ -3327,6 +3357,7 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptor(QualType Type) {
 }
 
 void ItaniumCXXABI::EmitFundamentalRTTIDescriptors() {
+  // Types added here must also be added to TypeInfoIsInStandardLibrary.
   QualType FundamentalTypes[] = {
       getContext().VoidTy,             getContext().NullPtrTy,
       getContext().BoolTy,             getContext().WCharTy,
@@ -3335,10 +3366,11 @@ void ItaniumCXXABI::EmitFundamentalRTTIDescriptors() {
       getContext().UnsignedShortTy,    getContext().IntTy,
       getContext().UnsignedIntTy,      getContext().LongTy,
       getContext().UnsignedLongTy,     getContext().LongLongTy,
-      getContext().UnsignedLongLongTy, getContext().HalfTy,
+      getContext().UnsignedLongLongTy, getContext().Int128Ty,
+      getContext().UnsignedInt128Ty,   getContext().HalfTy,
       getContext().FloatTy,            getContext().DoubleTy,
-      getContext().LongDoubleTy,       getContext().Char16Ty,
-      getContext().Char32Ty,
+      getContext().LongDoubleTy,       getContext().Float128Ty,
+      getContext().Char16Ty,           getContext().Char32Ty
   };
   for (const QualType &FundamentalType : FundamentalTypes)
     EmitFundamentalRTTIDescriptor(FundamentalType);
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
index 93210d5..41cd53c 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -254,8 +254,8 @@ public:
                           CXXDtorType Type, bool ForVirtualBase,
                           bool Delegating, Address This) override;
 
-  void emitVTableBitSetEntries(VPtrInfo *Info, const CXXRecordDecl *RD,
-                               llvm::GlobalVariable *VTable);
+  void emitVTableTypeMetadata(VPtrInfo *Info, const CXXRecordDecl *RD,
+                              llvm::GlobalVariable *VTable);
 
   void emitVTableDefinitions(CodeGenVTables &CGVT,
                              const CXXRecordDecl *RD) override;
@@ -551,7 +551,7 @@ private:
     return  llvm::Constant::getAllOnesValue(CGM.IntTy);
   }
 
-  CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD);
+  CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) override;
 
   void
   GetNullMemberPointerFields(const MemberPointerType *MPT,
@@ -942,7 +942,6 @@ MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value,
   llvm::Value *Offset =
     GetVirtualBaseClassOffset(CGF, Value, SrcDecl, PolymorphicBase);
   llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(Value.getPointer(), Offset);
-  Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
   CharUnits VBaseAlign =
     CGF.CGM.getVBaseAlignment(Value.getAlignment(), SrcDecl, PolymorphicBase);
   return std::make_pair(Address(Ptr, VBaseAlign), Offset);
@@ -976,8 +975,8 @@ llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF,
                                          QualType SrcRecordTy,
                                          Address ThisPtr,
                                          llvm::Type *StdTypeInfoPtrTy) {
-  llvm::Value *Offset;
-  std::tie(ThisPtr, Offset) = performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
+  std::tie(ThisPtr, std::ignore) =
+      performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
   auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction();
   return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy);
 }
@@ -1002,6 +1001,7 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
   llvm::Value *Offset;
   std::tie(This, Offset) = performBaseAdjustment(CGF, This, SrcRecordTy);
   llvm::Value *ThisPtr = This.getPointer();
+  Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
 
   // PVOID __RTDynamicCast(
   //   PVOID inptr,
@@ -1025,8 +1025,7 @@ llvm::Value *
 MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
                                        QualType SrcRecordTy,
                                        QualType DestTy) {
-  llvm::Value *Offset;
-  std::tie(Value, Offset) = performBaseAdjustment(CGF, Value, SrcRecordTy);
+  std::tie(Value, std::ignore) = performBaseAdjustment(CGF, Value, SrcRecordTy);
 
   // PVOID __RTCastToVoid(
   //   PVOID inptr)
@@ -1152,16 +1151,14 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
 
     llvm::Value *VBaseOffset =
         GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, I->first);
-    // FIXME: it doesn't look right that we SExt in GetVirtualBaseClassOffset()
-    // just to Trunc back immediately.
-    VBaseOffset = Builder.CreateTruncOrBitCast(VBaseOffset, CGF.Int32Ty);
     uint64_t ConstantVBaseOffset =
         Layout.getVBaseClassOffset(I->first).getQuantity();
 
     // vtorDisp_for_vbase = vbptr[vbase_idx] - offsetof(RD, vbase).
     llvm::Value *VtorDispValue = Builder.CreateSub(
-        VBaseOffset, llvm::ConstantInt::get(CGM.Int32Ty, ConstantVBaseOffset),
+        VBaseOffset, llvm::ConstantInt::get(CGM.PtrDiffTy, ConstantVBaseOffset),
         "vtordisp.value");
+    VtorDispValue = Builder.CreateTruncOrBitCast(VtorDispValue, CGF.Int32Ty);
 
     if (!Int8This)
       Int8This = Builder.CreateBitCast(getThisValue(CGF),
@@ -1467,16 +1464,18 @@ unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
 
   // Add the 'most_derived' argument second if we are variadic or last if not.
   const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
-  llvm::Value *MostDerivedArg =
-      llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
-  RValue RV = RValue::get(MostDerivedArg);
-  if (MostDerivedArg) {
-    if (FPT->isVariadic())
-      Args.insert(Args.begin() + 1,
-                  CallArg(RV, getContext().IntTy, /*needscopy=*/false));
-    else
-      Args.add(RV, getContext().IntTy);
+  llvm::Value *MostDerivedArg;
+  if (Delegating) {
+    MostDerivedArg = getStructorImplicitParamValue(CGF);
+  } else {
+    MostDerivedArg = llvm::ConstantInt::get(CGM.Int32Ty, Type == Ctor_Complete);
   }
+  RValue RV = RValue::get(MostDerivedArg);
+  if (FPT->isVariadic())
+    Args.insert(Args.begin() + 1,
+                CallArg(RV, getContext().IntTy, /*needscopy=*/false));
+  else
+    Args.add(RV, getContext().IntTy);
 
   return 1;  // Added one arg.
 }
@@ -1494,24 +1493,18 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                                     This, false);
   }
 
-  CGF.EmitCXXStructorCall(DD, Callee, ReturnValueSlot(), This.getPointer(),
-                          /*ImplicitParam=*/nullptr,
-                          /*ImplicitParamTy=*/QualType(), nullptr,
-                          getFromDtorType(Type));
+  CGF.EmitCXXDestructorCall(DD, Callee, This.getPointer(),
+                            /*ImplicitParam=*/nullptr,
+                            /*ImplicitParamTy=*/QualType(), nullptr,
+                            getFromDtorType(Type));
 }
 
-void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
-                                              const CXXRecordDecl *RD,
-                                              llvm::GlobalVariable *VTable) {
-  if (!getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIVCall) &&
-      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFINVCall) &&
-      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIDerivedCast) &&
-      !getContext().getLangOpts().Sanitize.has(SanitizerKind::CFIUnrelatedCast))
+void MicrosoftCXXABI::emitVTableTypeMetadata(VPtrInfo *Info,
+                                             const CXXRecordDecl *RD,
+                                             llvm::GlobalVariable *VTable) {
+  if (!CGM.getCodeGenOpts().PrepareForLTO)
     return;
 
-  llvm::NamedMDNode *BitsetsMD =
-      CGM.getModule().getOrInsertNamedMetadata("llvm.bitsets");
-
   // The location of the first virtual function pointer in the virtual table,
   // aka the "address point" on Itanium. This is at offset 0 if RTTI is
   // disabled, or sizeof(void*) if RTTI is enabled.
@@ -1522,15 +1515,13 @@ void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
           : CharUnits::Zero();
 
   if (Info->PathToBaseWithVPtr.empty()) {
-    if (!CGM.IsCFIBlacklistedRecord(RD))
-      CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, RD);
+    CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD);
     return;
   }
 
   // Add a bitset entry for the least derived base belonging to this vftable.
-  if (!CGM.IsCFIBlacklistedRecord(Info->PathToBaseWithVPtr.back()))
-    CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint,
-                                Info->PathToBaseWithVPtr.back());
+  CGM.AddVTableTypeMetadata(VTable, AddressPoint,
+                            Info->PathToBaseWithVPtr.back());
 
   // Add a bitset entry for each derived class that is laid out at the same
   // offset as the least derived base.
@@ -1548,13 +1539,12 @@ void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
       Offset = VBI->second.VBaseOffset;
     if (!Offset.isZero())
       return;
-    if (!CGM.IsCFIBlacklistedRecord(DerivedRD))
-      CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, DerivedRD);
+    CGM.AddVTableTypeMetadata(VTable, AddressPoint, DerivedRD);
   }
 
   // Finally do the same for the most derived class.
-  if (Info->FullOffsetInMDC.isZero() && !CGM.IsCFIBlacklistedRecord(RD))
-    CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, RD);
+  if (Info->FullOffsetInMDC.isZero())
+    CGM.AddVTableTypeMetadata(VTable, AddressPoint, RD);
 }
 
 void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -1567,12 +1557,14 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
     if (VTable->hasInitializer())
       continue;
 
-    llvm::Constant *RTTI = getContext().getLangOpts().RTTIData
-                               ? getMSCompleteObjectLocator(RD, Info)
-                               : nullptr;
-
     const VTableLayout &VTLayout =
       VFTContext.getVFTableLayout(RD, Info->FullOffsetInMDC);
+
+    llvm::Constant *RTTI = nullptr;
+    if (any_of(VTLayout.vtable_components(),
+               [](const VTableComponent &VTC) { return VTC.isRTTIKind(); }))
+      RTTI = getMSCompleteObjectLocator(RD, Info);
+
     llvm::Constant *Init = CGVT.CreateVTableInitializer(
         RD, VTLayout.vtable_component_begin(),
         VTLayout.getNumVTableComponents(), VTLayout.vtable_thunk_begin(),
@@ -1580,7 +1572,7 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
 
     VTable->setInitializer(Init);
 
-    emitVTableBitSetEntries(Info, RD, VTable);
+    emitVTableTypeMetadata(Info, RD, VTable);
   }
 }
 
@@ -1642,7 +1634,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
 
   if (DeferredVFTables.insert(RD).second) {
     // We haven't processed this record type before.
-    // Queue up this v-table for possible deferred emission.
+    // Queue up this vtable for possible deferred emission.
     CGM.addDeferredVTable(RD);
 
 #ifndef NDEBUG
@@ -1671,7 +1663,16 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   SmallString<256> VFTableName;
   mangleVFTableName(getMangleContext(), RD, VFPtr, VFTableName);
 
-  llvm::GlobalValue::LinkageTypes VFTableLinkage = CGM.getVTableLinkage(RD);
+  // Classes marked __declspec(dllimport) need vftables generated on the
+  // import-side in order to support features like constexpr.  No other
+  // translation unit relies on the emission of the local vftable, translation
+  // units are expected to generate them as needed.
+  //
+  // Because of this unique behavior, we maintain this logic here instead of
+  // getVTableLinkage.
+  llvm::GlobalValue::LinkageTypes VFTableLinkage =
+      RD->hasAttr<DLLImportAttr>() ? llvm::GlobalValue::LinkOnceODRLinkage
+                                   : CGM.getVTableLinkage(RD);
   bool VFTableComesFromAnotherTU =
       llvm::GlobalValue::isAvailableExternallyLinkage(VFTableLinkage) ||
       llvm::GlobalValue::isExternalLinkage(VFTableLinkage);
@@ -1705,7 +1706,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   VTable = new llvm::GlobalVariable(CGM.getModule(), VTableType,
                                     /*isConstant=*/true, VTableLinkage,
                                     /*Initializer=*/nullptr, VTableName);
-  VTable->setUnnamedAddr(true);
+  VTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   llvm::Comdat *C = nullptr;
   if (!VFTableComesFromAnotherTU &&
@@ -1733,7 +1734,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
                                         /*AddressSpace=*/0, VFTableLinkage,
                                         VFTableName.str(), VTableGEP,
                                         &CGM.getModule());
-    VFTable->setUnnamedAddr(true);
+    VFTable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   } else {
     // We don't need a GlobalAlias to be a symbol for the VTable if we won't
     // be referencing any RTTI data.
@@ -1744,9 +1745,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   if (C)
     VTable->setComdat(C);
 
-  if (RD->hasAttr<DLLImportAttr>())
-    VFTable->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
-  else if (RD->hasAttr<DLLExportAttr>())
+  if (RD->hasAttr<DLLExportAttr>())
     VFTable->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass);
 
   VFTablesMap[ID] = VFTable;
@@ -1813,13 +1812,20 @@ llvm::Value *MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
   MicrosoftVTableContext::MethodVFTableLocation ML =
       CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
-  if (CGF.SanOpts.has(SanitizerKind::CFIVCall))
-    CGF.EmitVTablePtrCheck(getClassAtVTableLocation(getContext(), GD, ML),
-                           VTable, CodeGenFunction::CFITCK_VCall, Loc);
 
-  llvm::Value *VFuncPtr =
-      Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
-  return Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+  if (CGF.ShouldEmitVTableTypeCheckedLoad(MethodDecl->getParent())) {
+    return CGF.EmitVTableTypeCheckedLoad(
+        getClassAtVTableLocation(getContext(), GD, ML), VTable,
+        ML.Index * CGM.getContext().getTargetInfo().getPointerWidth(0) / 8);
+  } else {
+    if (CGM.getCodeGenOpts().PrepareForLTO)
+      CGF.EmitTypeMetadataCodeForVCall(
+          getClassAtVTableLocation(getContext(), GD, ML), VTable, Loc);
+
+    llvm::Value *VFuncPtr =
+        Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
+    return Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
+  }
 }
 
 llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
@@ -1843,10 +1849,9 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
       DtorType == Dtor_Deleting);
 
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
-  RValue RV = CGF.EmitCXXStructorCall(Dtor, Callee, ReturnValueSlot(),
-                                      This.getPointer(),
-                                      ImplicitParam, Context.IntTy, CE,
-                                      StructorType::Deleting);
+  RValue RV =
+      CGF.EmitCXXDestructorCall(Dtor, Callee, This.getPointer(), ImplicitParam,
+                                Context.IntTy, CE, StructorType::Deleting);
   return RV.getScalarVal();
 }
 
@@ -1916,7 +1921,7 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
   ThunkFn->addFnAttr("thunk");
 
   // These thunks can be compared, so they are not unnamed.
-  ThunkFn->setUnnamedAddr(false);
+  ThunkFn->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
 
   // Start codegen.
   CodeGenFunction CGF(CGM);
@@ -1973,7 +1978,7 @@ MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
          "vbtable with this name already exists: mangling bug?");
   llvm::GlobalVariable *GV =
       CGM.CreateOrReplaceCXXRuntimeVariable(Name, VBTableType, Linkage);
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
 
   if (RD->hasAttr<DLLImportAttr>())
     GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass);
@@ -2030,6 +2035,9 @@ void MicrosoftCXXABI::emitVBTableDefinition(const VPtrInfo &VBT,
     llvm::ArrayType::get(CGM.IntTy, Offsets.size());
   llvm::Constant *Init = llvm::ConstantArray::get(VBTableType, Offsets);
   GV->setInitializer(Init);
+
+  if (RD->hasAttr<DLLImportAttr>())
+    GV->setLinkage(llvm::GlobalVariable::AvailableExternallyLinkage);
 }
 
 llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
@@ -2302,7 +2310,7 @@ struct ResetGuardBit final : EHScopeStack::Cleanup {
     CGBuilderTy &Builder = CGF.Builder;
     llvm::LoadInst *LI = Builder.CreateLoad(Guard);
     llvm::ConstantInt *Mask =
-        llvm::ConstantInt::get(CGF.IntTy, ~(1U << GuardNum));
+        llvm::ConstantInt::get(CGF.IntTy, ~(1ULL << GuardNum));
     Builder.CreateStore(Builder.CreateAnd(LI, Mask), Guard);
   }
 };
@@ -2415,7 +2423,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
     // }
 
     // Test our bit from the guard variable.
-    llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1U << GuardNum);
+    llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum);
     llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr);
     llvm::Value *IsInitialized =
         Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
@@ -3631,7 +3639,8 @@ MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo *Info) {
 }
 
 static QualType decomposeTypeForEH(ASTContext &Context, QualType T,
-                                   bool &IsConst, bool &IsVolatile) {
+                                   bool &IsConst, bool &IsVolatile,
+                                   bool &IsUnaligned) {
   T = Context.getExceptionObjectType(T);
 
   // C++14 [except.handle]p3:
@@ -3641,10 +3650,12 @@ static QualType decomposeTypeForEH(ASTContext &Context, QualType T,
   //         - a qualification conversion
   IsConst = false;
   IsVolatile = false;
+  IsUnaligned = false;
   QualType PointeeType = T->getPointeeType();
   if (!PointeeType.isNull()) {
     IsConst = PointeeType.isConstQualified();
     IsVolatile = PointeeType.isVolatileQualified();
+    IsUnaligned = PointeeType.getQualifiers().hasUnaligned();
   }
 
   // Member pointer types like "const int A::*" are represented by having RTTI
@@ -3667,8 +3678,9 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
   // TypeDescriptors for exceptions never have qualified pointer types,
   // qualifiers are stored seperately in order to support qualification
   // conversions.
-  bool IsConst, IsVolatile;
-  Type = decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile);
+  bool IsConst, IsVolatile, IsUnaligned;
+  Type =
+      decomposeTypeForEH(getContext(), Type, IsConst, IsVolatile, IsUnaligned);
 
   bool IsReference = CatchHandlerType->isReferenceType();
 
@@ -3677,6 +3689,8 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
     Flags |= 1;
   if (IsVolatile)
     Flags |= 2;
+  if (IsUnaligned)
+    Flags |= 4;
   if (IsReference)
     Flags |= 8;
 
@@ -3961,7 +3975,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(CTType, Fields), MangledName);
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   GV->setSection(".xdata");
   if (GV->isWeakForLinker())
     GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
@@ -4079,7 +4093,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
   CTA = new llvm::GlobalVariable(
       CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(CTAType, Fields), MangledName);
-  CTA->setUnnamedAddr(true);
+  CTA->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   CTA->setSection(".xdata");
   if (CTA->isWeakForLinker())
     CTA->setComdat(CGM.getModule().getOrInsertComdat(CTA->getName()));
@@ -4087,8 +4101,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
 }
 
 llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
-  bool IsConst, IsVolatile;
-  T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile);
+  bool IsConst, IsVolatile, IsUnaligned;
+  T = decomposeTypeForEH(getContext(), T, IsConst, IsVolatile, IsUnaligned);
 
   // The CatchableTypeArray enumerates the various (CV-unqualified) types that
   // the exception object may be caught as.
@@ -4104,8 +4118,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
   SmallString<256> MangledName;
   {
     llvm::raw_svector_ostream Out(MangledName);
-    getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, NumEntries,
-                                          Out);
+    getMangleContext().mangleCXXThrowInfo(T, IsConst, IsVolatile, IsUnaligned,
+                                          NumEntries, Out);
   }
 
   // Reuse a previously generated ThrowInfo if we have generated an appropriate
@@ -4121,6 +4135,8 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
     Flags |= 1;
   if (IsVolatile)
     Flags |= 2;
+  if (IsUnaligned)
+    Flags |= 4;
 
   // The cleanup-function (a destructor) must be called when the exception
   // object's lifetime ends.
@@ -4146,7 +4162,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) {
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), TIType, /*Constant=*/true, getLinkageForRTTI(T),
       llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName));
-  GV->setUnnamedAddr(true);
+  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
   GV->setSection(".xdata");
   if (GV->isWeakForLinker())
     GV->setComdat(CGM.getModule().getOrInsertComdat(GV->getName()));
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
index 0be5c55..952d162 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -25,7 +25,9 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include <memory>
+
 using namespace clang;
+using namespace CodeGen;
 
 namespace {
   class CodeGeneratorImpl : public CodeGenerator {
@@ -36,13 +38,21 @@ namespace {
     const CodeGenOptions CodeGenOpts;  // Intentionally copied in.
 
     unsigned HandlingTopLevelDecls;
+
+    /// Use this when emitting decls to block re-entrant decl emission. It will
+    /// emit all deferred decls on scope exit. Set EmitDeferred to false if decl
+    /// emission must be deferred longer, like at the end of a tag definition.
     struct HandlingTopLevelDeclRAII {
       CodeGeneratorImpl &Self;
-      HandlingTopLevelDeclRAII(CodeGeneratorImpl &Self) : Self(Self) {
+      bool EmitDeferred;
+      HandlingTopLevelDeclRAII(CodeGeneratorImpl &Self,
+                               bool EmitDeferred = true)
+          : Self(Self), EmitDeferred(EmitDeferred) {
         ++Self.HandlingTopLevelDecls;
       }
       ~HandlingTopLevelDeclRAII() {
-        if (--Self.HandlingTopLevelDecls == 0)
+        unsigned Level = --Self.HandlingTopLevelDecls;
+        if (Level == 0 && EmitDeferred)
           Self.EmitDeferredDecls();
       }
     };
@@ -57,15 +67,16 @@ namespace {
     SmallVector<CXXMethodDecl *, 8> DeferredInlineMethodDefinitions;
 
   public:
-    CodeGeneratorImpl(DiagnosticsEngine &diags, const std::string &ModuleName,
+    CodeGeneratorImpl(DiagnosticsEngine &diags, llvm::StringRef ModuleName,
                       const HeaderSearchOptions &HSO,
                       const PreprocessorOptions &PPO, const CodeGenOptions &CGO,
                       llvm::LLVMContext &C,
                       CoverageSourceInfo *CoverageInfo = nullptr)
         : Diags(diags), Ctx(nullptr), HeaderSearchOpts(HSO),
           PreprocessorOpts(PPO), CodeGenOpts(CGO), HandlingTopLevelDecls(0),
-          CoverageInfo(CoverageInfo),
-          M(new llvm::Module(ModuleName, C)) {}
+          CoverageInfo(CoverageInfo), M(new llvm::Module(ModuleName, C)) {
+      C.setDiscardValueNames(CGO.DiscardValueNames);
+    }
 
     ~CodeGeneratorImpl() override {
       // There should normally not be any leftover inline method definitions.
@@ -73,11 +84,19 @@ namespace {
              Diags.hasErrorOccurred());
     }
 
-    llvm::Module* GetModule() override {
+    CodeGenModule &CGM() {
+      return *Builder;
+    }
+
+    llvm::Module *GetModule() {
       return M.get();
     }
 
-    const Decl *GetDeclForMangledName(StringRef MangledName) override {
+    llvm::Module *ReleaseModule() {
+      return M.release();
+    }
+
+    const Decl *GetDeclForMangledName(StringRef MangledName) {
       GlobalDecl Result;
       if (!Builder->lookupRepresentativeDecl(MangledName, Result))
         return nullptr;
@@ -92,19 +111,23 @@ namespace {
       return D;
     }
 
-    llvm::Module *ReleaseModule() override { return M.release(); }
+    llvm::Constant *GetAddrOfGlobal(GlobalDecl global, bool isForDefinition) {
+      return Builder->GetAddrOfGlobal(global, isForDefinition);
+    }
 
     void Initialize(ASTContext &Context) override {
       Ctx = &Context;
 
       M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
-      M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
+      M->setDataLayout(Ctx->getTargetInfo().getDataLayout());
       Builder.reset(new CodeGen::CodeGenModule(Context, HeaderSearchOpts,
                                                PreprocessorOpts, CodeGenOpts,
                                                *M, Diags, CoverageInfo));
 
-      for (size_t i = 0, e = CodeGenOpts.DependentLibraries.size(); i < e; ++i)
-        HandleDependentLibrary(CodeGenOpts.DependentLibraries[i]);
+      for (auto &&Lib : CodeGenOpts.DependentLibraries)
+        Builder->AddDependentLib(Lib);
+      for (auto &&Opt : CodeGenOpts.LinkerOptions)
+        Builder->AppendLinkerOptions(Opt);
     }
 
     void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
@@ -140,12 +163,23 @@ namespace {
       DeferredInlineMethodDefinitions.clear();
     }
 
-    void HandleInlineMethodDefinition(CXXMethodDecl *D) override {
+    void HandleInlineFunctionDefinition(FunctionDecl *D) override {
       if (Diags.hasErrorOccurred())
         return;
 
       assert(D->doesThisDeclarationHaveABody());
 
+      // Handle friend functions.
+      if (D->isInIdentifierNamespace(Decl::IDNS_OrdinaryFriend)) {
+        if (Ctx->getTargetInfo().getCXXABI().isMicrosoft()
+            && !D->getLexicalDeclContext()->isDependentContext())
+          Builder->EmitTopLevelDecl(D);
+        return;
+      }
+
+      // Otherwise, must be a method.
+      auto MD = cast<CXXMethodDecl>(D);
+
       // We may want to emit this definition. However, that decision might be
       // based on computing the linkage, and we have to defer that in case we
       // are inside of something that will change the method's final linkage,
@@ -154,13 +188,13 @@ namespace {
       //     void bar();
       //     void foo() { bar(); }
       //   } A;
-      DeferredInlineMethodDefinitions.push_back(D);
+      DeferredInlineMethodDefinitions.push_back(MD);
 
       // Provide some coverage mapping even for methods that aren't emitted.
       // Don't do this for templated classes though, as they may not be
       // instantiable.
-      if (!D->getParent()->getDescribedClassTemplate())
-        Builder->AddDeferredUnusedCoverageMapping(D);
+      if (!MD->getParent()->getDescribedClassTemplate())
+        Builder->AddDeferredUnusedCoverageMapping(MD);
     }
 
     /// HandleTagDeclDefinition - This callback is invoked each time a TagDecl
@@ -171,6 +205,10 @@ namespace {
       if (Diags.hasErrorOccurred())
         return;
 
+      // Don't allow re-entrant calls to CodeGen triggered by PCH
+      // deserialization to emit deferred decls.
+      HandlingTopLevelDeclRAII HandlingDecl(*this, /*EmitDeferred=*/false);
+
       Builder->UpdateCompletedType(D);
 
       // For MSVC compatibility, treat declarations of static data members with
@@ -185,27 +223,50 @@ namespace {
           }
         }
       }
+      // For OpenMP emit declare reduction functions, if required.
+      if (Ctx->getLangOpts().OpenMP) {
+        for (Decl *Member : D->decls()) {
+          if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Member)) {
+            if (Ctx->DeclMustBeEmitted(DRD))
+              Builder->EmitGlobal(DRD);
+          }
+        }
+      }
     }
 
     void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
       if (Diags.hasErrorOccurred())
         return;
 
+      // Don't allow re-entrant calls to CodeGen triggered by PCH
+      // deserialization to emit deferred decls.
+      HandlingTopLevelDeclRAII HandlingDecl(*this, /*EmitDeferred=*/false);
+
       if (CodeGen::CGDebugInfo *DI = Builder->getModuleDebugInfo())
         if (const RecordDecl *RD = dyn_cast<RecordDecl>(D))
           DI->completeRequiredType(RD);
     }
 
     void HandleTranslationUnit(ASTContext &Ctx) override {
+      // Release the Builder when there is no error.
+      if (!Diags.hasErrorOccurred() && Builder)
+        Builder->Release();
+
+      // If there are errors before or when releasing the Builder, reset
+      // the module to stop here before invoking the backend.
       if (Diags.hasErrorOccurred()) {
         if (Builder)
           Builder->clear();
         M.reset();
         return;
       }
+    }
 
-      if (Builder)
-        Builder->Release();
+    void AssignInheritanceModel(CXXRecordDecl *RD) override {
+      if (Diags.hasErrorOccurred())
+        return;
+
+      Builder->RefreshTypeCacheForClass(RD);
     }
 
     void CompleteTentativeDefinition(VarDecl *D) override {
@@ -221,26 +282,35 @@ namespace {
 
       Builder->EmitVTable(RD);
     }
+  };
+}
 
-    void HandleLinkerOptionPragma(llvm::StringRef Opts) override {
-      Builder->AppendLinkerOptions(Opts);
-    }
+void CodeGenerator::anchor() { }
 
-    void HandleDetectMismatch(llvm::StringRef Name,
-                              llvm::StringRef Value) override {
-      Builder->AddDetectMismatch(Name, Value);
-    }
+CodeGenModule &CodeGenerator::CGM() {
+  return static_cast<CodeGeneratorImpl*>(this)->CGM();
+}
 
-    void HandleDependentLibrary(llvm::StringRef Lib) override {
-      Builder->AddDependentLib(Lib);
-    }
-  };
+llvm::Module *CodeGenerator::GetModule() {
+  return static_cast<CodeGeneratorImpl*>(this)->GetModule();
 }
 
-void CodeGenerator::anchor() { }
+llvm::Module *CodeGenerator::ReleaseModule() {
+  return static_cast<CodeGeneratorImpl*>(this)->ReleaseModule();
+}
+
+const Decl *CodeGenerator::GetDeclForMangledName(llvm::StringRef name) {
+  return static_cast<CodeGeneratorImpl*>(this)->GetDeclForMangledName(name);
+}
+
+llvm::Constant *CodeGenerator::GetAddrOfGlobal(GlobalDecl global,
+                                               bool isForDefinition) {
+  return static_cast<CodeGeneratorImpl*>(this)
+           ->GetAddrOfGlobal(global, isForDefinition);
+}
 
 CodeGenerator *clang::CreateLLVMCodeGen(
-    DiagnosticsEngine &Diags, const std::string &ModuleName,
+    DiagnosticsEngine &Diags, llvm::StringRef ModuleName,
     const HeaderSearchOptions &HeaderSearchOpts,
     const PreprocessorOptions &PreprocessorOpts, const CodeGenOptions &CGO,
     llvm::LLVMContext &C, CoverageSourceInfo *CoverageInfo) {
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index f385e53..de40e41 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -19,8 +19,8 @@
 #include "clang/CodeGen/BackendUtil.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Frontend/CompilerInstance.h"
-#include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/Preprocessor.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitstreamReader.h"
@@ -31,8 +31,10 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/TargetRegistry.h"
 #include <memory>
+#include <utility>
 
 using namespace clang;
 
@@ -42,6 +44,7 @@ namespace {
 class PCHContainerGenerator : public ASTConsumer {
   DiagnosticsEngine &Diags;
   const std::string MainFileName;
+  const std::string OutputFileName;
   ASTContext *Ctx;
   ModuleMap &MMap;
   const HeaderSearchOptions &HeaderSearchOpts;
@@ -52,17 +55,15 @@ class PCHContainerGenerator : public ASTConsumer {
   std::unique_ptr<llvm::LLVMContext> VMContext;
   std::unique_ptr<llvm::Module> M;
   std::unique_ptr<CodeGen::CodeGenModule> Builder;
-  raw_pwrite_stream *OS;
+  std::unique_ptr<raw_pwrite_stream> OS;
   std::shared_ptr<PCHBuffer> Buffer;
 
   /// Visit every type and emit debug info for it.
   struct DebugTypeVisitor : public RecursiveASTVisitor<DebugTypeVisitor> {
     clang::CodeGen::CGDebugInfo &DI;
     ASTContext &Ctx;
-    bool SkipTagDecls;
-    DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx,
-                     bool SkipTagDecls)
-        : DI(DI), Ctx(Ctx), SkipTagDecls(SkipTagDecls) {}
+    DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx)
+        : DI(DI), Ctx(Ctx) {}
 
     /// Determine whether this type can be represented in DWARF.
     static bool CanRepresent(const Type *Ty) {
@@ -80,7 +81,8 @@ class PCHContainerGenerator : public ASTConsumer {
       // TagDecls may be deferred until after all decls have been merged and we
       // know the complete type. Pure forward declarations will be skipped, but
       // they don't need to be emitted into the module anyway.
-      if (SkipTagDecls && isa<TagDecl>(D))
+      if (auto *TD = dyn_cast<TagDecl>(D))
+        if (!TD->isCompleteDefinition())
           return true;
 
       QualType QualTy = Ctx.getTypeDeclType(D);
@@ -103,7 +105,7 @@ class PCHContainerGenerator : public ASTConsumer {
         return true;
 
       SmallVector<QualType, 16> ArgTypes;
-      for (auto i : D->params())
+      for (auto i : D->parameters())
         ArgTypes.push_back(i->getType());
       QualType RetTy = D->getReturnType();
       QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes,
@@ -122,7 +124,7 @@ class PCHContainerGenerator : public ASTConsumer {
       ArgTypes.push_back(D->getSelfType(Ctx, D->getClassInterface(),
                                         selfIsPseudoStrong, selfIsConsumed));
       ArgTypes.push_back(Ctx.getObjCSelType());
-      for (auto i : D->params())
+      for (auto i : D->parameters())
         ArgTypes.push_back(i->getType());
       QualType RetTy = D->getReturnType();
       QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes,
@@ -136,20 +138,22 @@ class PCHContainerGenerator : public ASTConsumer {
 public:
   PCHContainerGenerator(CompilerInstance &CI, const std::string &MainFileName,
                         const std::string &OutputFileName,
-                        raw_pwrite_stream *OS,
+                        std::unique_ptr<raw_pwrite_stream> OS,
                         std::shared_ptr<PCHBuffer> Buffer)
-      : Diags(CI.getDiagnostics()), Ctx(nullptr),
+      : Diags(CI.getDiagnostics()), MainFileName(MainFileName),
+        OutputFileName(OutputFileName), Ctx(nullptr),
         MMap(CI.getPreprocessor().getHeaderSearchInfo().getModuleMap()),
         HeaderSearchOpts(CI.getHeaderSearchOpts()),
         PreprocessorOpts(CI.getPreprocessorOpts()),
-        TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()), OS(OS),
-        Buffer(Buffer) {
+        TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()),
+        OS(std::move(OS)), Buffer(std::move(Buffer)) {
     // The debug info output isn't affected by CodeModel and
     // ThreadModel, but the backend expects them to be nonempty.
     CodeGenOpts.CodeModel = "default";
     CodeGenOpts.ThreadModel = "single";
     CodeGenOpts.DebugTypeExtRefs = true;
-    CodeGenOpts.setDebugInfo(CodeGenOptions::FullDebugInfo);
+    CodeGenOpts.setDebugInfo(codegenoptions::FullDebugInfo);
+    CodeGenOpts.setDebuggerTuning(CI.getCodeGenOpts().getDebuggerTuning());
   }
 
   ~PCHContainerGenerator() override = default;
@@ -160,10 +164,15 @@ public:
     Ctx = &Context;
     VMContext.reset(new llvm::LLVMContext());
     M.reset(new llvm::Module(MainFileName, *VMContext));
-    M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
+    M->setDataLayout(Ctx->getTargetInfo().getDataLayout());
     Builder.reset(new CodeGen::CodeGenModule(
         *Ctx, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags));
-    Builder->getModuleDebugInfo()->setModuleMap(MMap);
+
+    // Prepare CGDebugInfo to emit debug info for a clang module.
+    auto *DI = Builder->getModuleDebugInfo();
+    StringRef ModuleName = llvm::sys::path::filename(MainFileName);
+    DI->setPCHDescriptor({ModuleName, "", OutputFileName, ~1ULL});
+    DI->setModuleMap(MMap);
   }
 
   bool HandleTopLevelDecl(DeclGroupRef D) override {
@@ -173,7 +182,7 @@ public:
     // Collect debug info for all decls in this group.
     for (auto *I : D)
       if (!I->isFromASTFile()) {
-        DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, true);
+        DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx);
         DTV.TraverseDecl(I);
       }
     return true;
@@ -190,7 +199,20 @@ public:
     if (D->isFromASTFile())
       return;
 
-    DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx, false);
+    // Anonymous tag decls are deferred until we are building their declcontext.
+    if (D->getName().empty())
+      return;
+
+    // Defer tag decls until their declcontext is complete.
+    auto *DeclCtx = D->getDeclContext();
+    while (DeclCtx) {
+      if (auto *D = dyn_cast<TagDecl>(DeclCtx))
+        if (!D->isCompleteDefinition())
+          return;
+      DeclCtx = DeclCtx->getParent();
+    }
+
+    DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx);
     DTV.TraverseDecl(D);
     Builder->UpdateCompletedType(D);
   }
@@ -215,8 +237,12 @@ public:
       return;
 
     M->setTargetTriple(Ctx.getTargetInfo().getTriple().getTriple());
-    M->setDataLayout(Ctx.getTargetInfo().getDataLayoutString());
-    Builder->getModuleDebugInfo()->setDwoId(Buffer->Signature);
+    M->setDataLayout(Ctx.getTargetInfo().getDataLayout());
+
+    // PCH files don't have a signature field in the control block,
+    // but LLVM detects DWO CUs by looking for a non-zero DWO id.
+    uint64_t Signature = Buffer->Signature ? Buffer->Signature : ~1ULL;
+    Builder->getModuleDebugInfo()->setDwoId(Signature);
 
     // Finalize the Builder.
     if (Builder)
@@ -255,20 +281,18 @@ public:
     DEBUG({
       // Print the IR for the PCH container to the debug output.
       llvm::SmallString<0> Buffer;
-      llvm::raw_svector_ostream OS(Buffer);
-      clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                               Ctx.getTargetInfo().getDataLayoutString(),
-                               M.get(), BackendAction::Backend_EmitLL, &OS);
+      clang::EmitBackendOutput(
+          Diags, CodeGenOpts, TargetOpts, LangOpts,
+          Ctx.getTargetInfo().getDataLayout(), M.get(),
+          BackendAction::Backend_EmitLL,
+          llvm::make_unique<llvm::raw_svector_ostream>(Buffer));
       llvm::dbgs() << Buffer;
     });
 
     // Use the LLVM backend to emit the pch container.
     clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                             Ctx.getTargetInfo().getDataLayoutString(),
-                             M.get(), BackendAction::Backend_EmitObj, OS);
-
-    // Make sure the pch container hits disk.
-    OS->flush();
+                             Ctx.getTargetInfo().getDataLayout(), M.get(),
+                             BackendAction::Backend_EmitObj, std::move(OS));
 
     // Free the memory for the temporary buffer.
     llvm::SmallVector<char, 0> Empty;
@@ -281,10 +305,11 @@ public:
 std::unique_ptr<ASTConsumer>
 ObjectFilePCHContainerWriter::CreatePCHContainerGenerator(
     CompilerInstance &CI, const std::string &MainFileName,
-    const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
+    const std::string &OutputFileName,
+    std::unique_ptr<llvm::raw_pwrite_stream> OS,
     std::shared_ptr<PCHBuffer> Buffer) const {
-  return llvm::make_unique<PCHContainerGenerator>(CI, MainFileName,
-                                                  OutputFileName, OS, Buffer);
+  return llvm::make_unique<PCHContainerGenerator>(
+      CI, MainFileName, OutputFileName, std::move(OS), Buffer);
 }
 
 void ObjectFilePCHContainerReader::ExtractPCH(
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
new file mode 100644
index 0000000..6c20f8c
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -0,0 +1,830 @@
+//===--- SwiftCallingConv.cpp - Lowering for the Swift calling convention -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the abstract lowering for the Swift calling convention.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CodeGen/SwiftCallingConv.h"
+#include "clang/Basic/TargetInfo.h"
+#include "CodeGenModule.h"
+#include "TargetInfo.h"
+
+using namespace clang;
+using namespace CodeGen;
+using namespace swiftcall;
+
+static const SwiftABIInfo &getSwiftABIInfo(CodeGenModule &CGM) {
+  return cast<SwiftABIInfo>(CGM.getTargetCodeGenInfo().getABIInfo());
+}
+
+static bool isPowerOf2(unsigned n) {
+  return n == (n & -n);
+}
+
+/// Given two types with the same size, try to find a common type.
+static llvm::Type *getCommonType(llvm::Type *first, llvm::Type *second) {
+  assert(first != second);
+
+  // Allow pointers to merge with integers, but prefer the integer type.
+  if (first->isIntegerTy()) {
+    if (second->isPointerTy()) return first;
+  } else if (first->isPointerTy()) {
+    if (second->isIntegerTy()) return second;
+    if (second->isPointerTy()) return first;
+
+  // Allow two vectors to be merged (given that they have the same size).
+  // This assumes that we never have two different vector register sets.
+  } else if (auto firstVecTy = dyn_cast<llvm::VectorType>(first)) {
+    if (auto secondVecTy = dyn_cast<llvm::VectorType>(second)) {
+      if (auto commonTy = getCommonType(firstVecTy->getElementType(),
+                                        secondVecTy->getElementType())) {
+        return (commonTy == firstVecTy->getElementType() ? first : second);
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) {
+  return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type));
+}
+
+void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) {
+  // Deal with various aggregate types as special cases:
+
+  // Record types.
+  if (auto recType = type->getAs<RecordType>()) {
+    addTypedData(recType->getDecl(), begin);
+
+  // Array types.
+  } else if (type->isArrayType()) {
+    // Incomplete array types (flexible array members?) don't provide
+    // data to lay out, and the other cases shouldn't be possible.
+    auto arrayType = CGM.getContext().getAsConstantArrayType(type);
+    if (!arrayType) return;
+
+    QualType eltType = arrayType->getElementType();
+    auto eltSize = CGM.getContext().getTypeSizeInChars(eltType);
+    for (uint64_t i = 0, e = arrayType->getSize().getZExtValue(); i != e; ++i) {
+      addTypedData(eltType, begin + i * eltSize);
+    }
+
+  // Complex types.
+  } else if (auto complexType = type->getAs<ComplexType>()) {
+    auto eltType = complexType->getElementType();
+    auto eltSize = CGM.getContext().getTypeSizeInChars(eltType);
+    auto eltLLVMType = CGM.getTypes().ConvertType(eltType);
+    addTypedData(eltLLVMType, begin, begin + eltSize);
+    addTypedData(eltLLVMType, begin + eltSize, begin + 2 * eltSize);
+
+  // Member pointer types.
+  } else if (type->getAs<MemberPointerType>()) {
+    // Just add it all as opaque.
+    addOpaqueData(begin, begin + CGM.getContext().getTypeSizeInChars(type));
+
+  // Everything else is scalar and should not convert as an LLVM aggregate.
+  } else {
+    // We intentionally convert as !ForMem because we want to preserve
+    // that a type was an i1.
+    auto llvmType = CGM.getTypes().ConvertType(type);
+    addTypedData(llvmType, begin);
+  }
+}
+
+void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin) {
+  addTypedData(record, begin, CGM.getContext().getASTRecordLayout(record));
+}
+
+void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin,
+                                    const ASTRecordLayout &layout) {
+  // Unions are a special case.
+  if (record->isUnion()) {
+    for (auto field : record->fields()) {
+      if (field->isBitField()) {
+        addBitFieldData(field, begin, 0);
+      } else {
+        addTypedData(field->getType(), begin);
+      }
+    }
+    return;
+  }
+
+  // Note that correctness does not rely on us adding things in
+  // their actual order of layout; it's just somewhat more efficient
+  // for the builder.
+
+  // With that in mind, add "early" C++ data.
+  auto cxxRecord = dyn_cast<CXXRecordDecl>(record);
+  if (cxxRecord) {
+    //   - a v-table pointer, if the class adds its own
+    if (layout.hasOwnVFPtr()) {
+      addTypedData(CGM.Int8PtrTy, begin);
+    }
+
+    //   - non-virtual bases
+    for (auto &baseSpecifier : cxxRecord->bases()) {
+      if (baseSpecifier.isVirtual()) continue;
+
+      auto baseRecord = baseSpecifier.getType()->getAsCXXRecordDecl();
+      addTypedData(baseRecord, begin + layout.getBaseClassOffset(baseRecord));
+    }
+
+    //   - a vbptr if the class adds its own
+    if (layout.hasOwnVBPtr()) {
+      addTypedData(CGM.Int8PtrTy, begin + layout.getVBPtrOffset());
+    }
+  }
+
+  // Add fields.
+  for (auto field : record->fields()) {
+    auto fieldOffsetInBits = layout.getFieldOffset(field->getFieldIndex());
+    if (field->isBitField()) {
+      addBitFieldData(field, begin, fieldOffsetInBits);
+    } else {
+      addTypedData(field->getType(),
+              begin + CGM.getContext().toCharUnitsFromBits(fieldOffsetInBits));
+    }
+  }
+
+  // Add "late" C++ data:
+  if (cxxRecord) {
+    //   - virtual bases
+    for (auto &vbaseSpecifier : cxxRecord->vbases()) {
+      auto baseRecord = vbaseSpecifier.getType()->getAsCXXRecordDecl();
+      addTypedData(baseRecord, begin + layout.getVBaseClassOffset(baseRecord));      
+    }
+  }
+}
+
+void SwiftAggLowering::addBitFieldData(const FieldDecl *bitfield,
+                                       CharUnits recordBegin,
+                                       uint64_t bitfieldBitBegin) {
+  assert(bitfield->isBitField());
+  auto &ctx = CGM.getContext();
+  auto width = bitfield->getBitWidthValue(ctx);
+
+  // We can ignore zero-width bit-fields.
+  if (width == 0) return;
+
+  // toCharUnitsFromBits rounds down.
+  CharUnits bitfieldByteBegin = ctx.toCharUnitsFromBits(bitfieldBitBegin);
+
+  // Find the offset of the last byte that is partially occupied by the
+  // bit-field; since we otherwise expect exclusive ends, the end is the
+  // next byte.
+  uint64_t bitfieldBitLast = bitfieldBitBegin + width - 1;
+  CharUnits bitfieldByteEnd =
+    ctx.toCharUnitsFromBits(bitfieldBitLast) + CharUnits::One();
+  addOpaqueData(recordBegin + bitfieldByteBegin,
+                recordBegin + bitfieldByteEnd);
+}
+
+void SwiftAggLowering::addTypedData(llvm::Type *type, CharUnits begin) {
+  assert(type && "didn't provide type for typed data");
+  addTypedData(type, begin, begin + getTypeStoreSize(CGM, type));
+}
+
+void SwiftAggLowering::addTypedData(llvm::Type *type,
+                                    CharUnits begin, CharUnits end) {
+  assert(type && "didn't provide type for typed data");
+  assert(getTypeStoreSize(CGM, type) == end - begin);
+
+  // Legalize vector types.
+  if (auto vecTy = dyn_cast<llvm::VectorType>(type)) {
+    SmallVector<llvm::Type*, 4> componentTys;
+    legalizeVectorType(CGM, end - begin, vecTy, componentTys);
+    assert(componentTys.size() >= 1);
+
+    // Walk the initial components.
+    for (size_t i = 0, e = componentTys.size(); i != e - 1; ++i) {
+      llvm::Type *componentTy = componentTys[i];
+      auto componentSize = getTypeStoreSize(CGM, componentTy);
+      assert(componentSize < end - begin);
+      addLegalTypedData(componentTy, begin, begin + componentSize);
+      begin += componentSize;
+    }
+
+    return addLegalTypedData(componentTys.back(), begin, end);
+  }
+
+  // Legalize integer types.
+  if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
+    if (!isLegalIntegerType(CGM, intTy))
+      return addOpaqueData(begin, end);
+  }
+
+  // All other types should be legal.
+  return addLegalTypedData(type, begin, end);
+}
+
+void SwiftAggLowering::addLegalTypedData(llvm::Type *type,
+                                         CharUnits begin, CharUnits end) {
+  // Require the type to be naturally aligned.
+  if (!begin.isZero() && !begin.isMultipleOf(getNaturalAlignment(CGM, type))) {
+
+    // Try splitting vector types.
+    if (auto vecTy = dyn_cast<llvm::VectorType>(type)) {
+      auto split = splitLegalVectorType(CGM, end - begin, vecTy);
+      auto eltTy = split.first;
+      auto numElts = split.second;
+
+      auto eltSize = (end - begin) / numElts;
+      assert(eltSize == getTypeStoreSize(CGM, eltTy));
+      for (size_t i = 0, e = numElts; i != e; ++i) {
+        addLegalTypedData(eltTy, begin, begin + eltSize);
+        begin += eltSize;
+      }
+      assert(begin == end);
+      return;
+    }
+
+    return addOpaqueData(begin, end);
+  }
+
+  addEntry(type, begin, end);
+}
+
+void SwiftAggLowering::addEntry(llvm::Type *type,
+                                CharUnits begin, CharUnits end) {
+  assert((!type ||
+          (!isa<llvm::StructType>(type) && !isa<llvm::ArrayType>(type))) &&
+         "cannot add aggregate-typed data");
+  assert(!type || begin.isMultipleOf(getNaturalAlignment(CGM, type)));
+
+  // Fast path: we can just add entries to the end.
+  if (Entries.empty() || Entries.back().End <= begin) {
+    Entries.push_back({begin, end, type});
+    return;
+  }
+
+  // Find the first existing entry that ends after the start of the new data.
+  // TODO: do a binary search if Entries is big enough for it to matter.
+  size_t index = Entries.size() - 1;
+  while (index != 0) {
+    if (Entries[index - 1].End <= begin) break;
+    --index;
+  }
+
+  // The entry ends after the start of the new data.
+  // If the entry starts after the end of the new data, there's no conflict.
+  if (Entries[index].Begin >= end) {
+    // This insertion is potentially O(n), but the way we generally build
+    // these layouts makes that unlikely to matter: we'd need a union of
+    // several very large types.
+    Entries.insert(Entries.begin() + index, {begin, end, type});
+    return;
+  }
+
+  // Otherwise, the ranges overlap.  The new range might also overlap
+  // with later ranges.
+restartAfterSplit:
+
+  // Simplest case: an exact overlap.
+  if (Entries[index].Begin == begin && Entries[index].End == end) {
+    // If the types match exactly, great.
+    if (Entries[index].Type == type) return;
+
+    // If either type is opaque, make the entry opaque and return.
+    if (Entries[index].Type == nullptr) {
+      return;
+    } else if (type == nullptr) {
+      Entries[index].Type = nullptr;
+      return;
+    }
+
+    // If they disagree in an ABI-agnostic way, just resolve the conflict
+    // arbitrarily.
+    if (auto entryType = getCommonType(Entries[index].Type, type)) {
+      Entries[index].Type = entryType;
+      return;
+    }
+
+    // Otherwise, make the entry opaque.
+    Entries[index].Type = nullptr;
+    return;
+  }
+
+  // Okay, we have an overlapping conflict of some sort.
+
+  // If we have a vector type, split it.
+  if (auto vecTy = dyn_cast_or_null<llvm::VectorType>(type)) {
+    auto eltTy = vecTy->getElementType();
+    CharUnits eltSize = (end - begin) / vecTy->getNumElements();
+    assert(eltSize == getTypeStoreSize(CGM, eltTy));
+    for (unsigned i = 0, e = vecTy->getNumElements(); i != e; ++i) {
+      addEntry(eltTy, begin, begin + eltSize);
+      begin += eltSize;
+    }
+    assert(begin == end);
+    return;
+  }
+
+  // If the entry is a vector type, split it and try again.
+  if (Entries[index].Type && Entries[index].Type->isVectorTy()) {
+    splitVectorEntry(index);
+    goto restartAfterSplit;
+  }
+
+  // Okay, we have no choice but to make the existing entry opaque.
+
+  Entries[index].Type = nullptr;
+
+  // Stretch the start of the entry to the beginning of the range.
+  if (begin < Entries[index].Begin) {
+    Entries[index].Begin = begin;
+    assert(index == 0 || begin >= Entries[index - 1].End);
+  }
+
+  // Stretch the end of the entry to the end of the range; but if we run
+  // into the start of the next entry, just leave the range there and repeat.
+  while (end > Entries[index].End) {
+    assert(Entries[index].Type == nullptr);
+
+    // If the range doesn't overlap the next entry, we're done.
+    if (index == Entries.size() - 1 || end <= Entries[index + 1].Begin) {
+      Entries[index].End = end;
+      break;
+    }
+
+    // Otherwise, stretch to the start of the next entry.
+    Entries[index].End = Entries[index + 1].Begin;
+
+    // Continue with the next entry.
+    index++;
+
+    // This entry needs to be made opaque if it is not already.
+    if (Entries[index].Type == nullptr)
+      continue;
+
+    // Split vector entries unless we completely subsume them.
+    if (Entries[index].Type->isVectorTy() &&
+        end < Entries[index].End) {
+      splitVectorEntry(index);
+    }
+
+    // Make the entry opaque.
+    Entries[index].Type = nullptr;
+  }
+}
+
+/// Replace the entry of vector type at offset 'index' with a sequence
+/// of its component vectors.
+void SwiftAggLowering::splitVectorEntry(unsigned index) {
+  auto vecTy = cast<llvm::VectorType>(Entries[index].Type);
+  auto split = splitLegalVectorType(CGM, Entries[index].getWidth(), vecTy);
+
+  auto eltTy = split.first;
+  CharUnits eltSize = getTypeStoreSize(CGM, eltTy);
+  auto numElts = split.second;
+  Entries.insert(&Entries[index + 1], numElts - 1, StorageEntry());
+
+  CharUnits begin = Entries[index].Begin;
+  for (unsigned i = 0; i != numElts; ++i) {
+    Entries[index].Type = eltTy;
+    Entries[index].Begin = begin;
+    Entries[index].End = begin + eltSize;
+    begin += eltSize;
+  }
+}
+
+/// Given a power-of-two unit size, return the offset of the aligned unit
+/// of that size which contains the given offset.
+///
+/// In other words, round down to the nearest multiple of the unit size.
+static CharUnits getOffsetAtStartOfUnit(CharUnits offset, CharUnits unitSize) {
+  assert(isPowerOf2(unitSize.getQuantity()));
+  auto unitMask = ~(unitSize.getQuantity() - 1);
+  return CharUnits::fromQuantity(offset.getQuantity() & unitMask);
+}
+
+static bool areBytesInSameUnit(CharUnits first, CharUnits second,
+                               CharUnits chunkSize) {
+  return getOffsetAtStartOfUnit(first, chunkSize)
+      == getOffsetAtStartOfUnit(second, chunkSize);
+}
+
+void SwiftAggLowering::finish() {
+  if (Entries.empty()) {
+    Finished = true;
+    return;
+  }
+
+  // We logically split the layout down into a series of chunks of this size,
+  // which is generally the size of a pointer.
+  const CharUnits chunkSize = getMaximumVoluntaryIntegerSize(CGM);
+
+  // First pass: if two entries share a chunk, make them both opaque
+  // and stretch one to meet the next.
+  bool hasOpaqueEntries = (Entries[0].Type == nullptr);
+  for (size_t i = 1, e = Entries.size(); i != e; ++i) {
+    if (areBytesInSameUnit(Entries[i - 1].End - CharUnits::One(),
+                           Entries[i].Begin, chunkSize)) {
+      Entries[i - 1].Type = nullptr;
+      Entries[i].Type = nullptr;
+      Entries[i - 1].End = Entries[i].Begin;
+      hasOpaqueEntries = true;
+
+    } else if (Entries[i].Type == nullptr) {
+      hasOpaqueEntries = true;
+    }
+  }
+
+  // The rest of the algorithm leaves non-opaque entries alone, so if we
+  // have no opaque entries, we're done.
+  if (!hasOpaqueEntries) {
+    Finished = true;
+    return;
+  }
+
+  // Okay, move the entries to a temporary and rebuild Entries.
+  auto orig = std::move(Entries);
+  assert(Entries.empty());
+
+  for (size_t i = 0, e = orig.size(); i != e; ++i) {
+    // Just copy over non-opaque entries.
+    if (orig[i].Type != nullptr) {
+      Entries.push_back(orig[i]);
+      continue;
+    }
+
+    // Scan forward to determine the full extent of the next opaque range.
+    // We know from the first pass that only contiguous ranges will overlap
+    // the same aligned chunk.
+    auto begin = orig[i].Begin;
+    auto end = orig[i].End;
+    while (i + 1 != e &&
+           orig[i + 1].Type == nullptr &&
+           end == orig[i + 1].Begin) {
+      end = orig[i + 1].End;
+      i++;
+    }
+
+    // Add an entry per intersected chunk.
+    do {
+      // Find the smallest aligned storage unit in the maximal aligned
+      // storage unit containing 'begin' that contains all the bytes in
+      // the intersection between the range and this chunk.
+      CharUnits localBegin = begin;
+      CharUnits chunkBegin = getOffsetAtStartOfUnit(localBegin, chunkSize);
+      CharUnits chunkEnd = chunkBegin + chunkSize;
+      CharUnits localEnd = std::min(end, chunkEnd);
+
+      // Just do a simple loop over ever-increasing unit sizes.
+      CharUnits unitSize = CharUnits::One();
+      CharUnits unitBegin, unitEnd;
+      for (; ; unitSize *= 2) {
+        assert(unitSize <= chunkSize);
+        unitBegin = getOffsetAtStartOfUnit(localBegin, unitSize);
+        unitEnd = unitBegin + unitSize;
+        if (unitEnd >= localEnd) break;
+      }
+
+      // Add an entry for this unit.
+      auto entryTy =
+        llvm::IntegerType::get(CGM.getLLVMContext(),
+                               CGM.getContext().toBits(unitSize));
+      Entries.push_back({unitBegin, unitEnd, entryTy});
+
+      // The next chunk starts where this chunk left off.
+      begin = localEnd;
+    } while (begin != end);
+  }
+
+  // Okay, finally finished.
+  Finished = true;
+}
+
+void SwiftAggLowering::enumerateComponents(EnumerationCallback callback) const {
+  assert(Finished && "haven't yet finished lowering");
+
+  for (auto &entry : Entries) {
+    callback(entry.Begin, entry.Type);
+  }
+}
+
+std::pair<llvm::StructType*, llvm::Type*>
+SwiftAggLowering::getCoerceAndExpandTypes() const {
+  assert(Finished && "haven't yet finished lowering");
+
+  auto &ctx = CGM.getLLVMContext();
+
+  if (Entries.empty()) {
+    auto type = llvm::StructType::get(ctx);
+    return { type, type };
+  }
+
+  SmallVector<llvm::Type*, 8> elts;
+  CharUnits lastEnd = CharUnits::Zero();
+  bool hasPadding = false;
+  bool packed = false;
+  for (auto &entry : Entries) {
+    if (entry.Begin != lastEnd) {
+      auto paddingSize = entry.Begin - lastEnd;
+      assert(!paddingSize.isNegative());
+
+      auto padding = llvm::ArrayType::get(llvm::Type::getInt8Ty(ctx),
+                                          paddingSize.getQuantity());
+      elts.push_back(padding);
+      hasPadding = true;
+    }
+
+    if (!packed && !entry.Begin.isMultipleOf(
+          CharUnits::fromQuantity(
+            CGM.getDataLayout().getABITypeAlignment(entry.Type))))
+      packed = true;
+
+    elts.push_back(entry.Type);
+    lastEnd = entry.End;
+  }
+
+  // We don't need to adjust 'packed' to deal with possible tail padding
+  // because we never do that kind of access through the coercion type.
+  auto coercionType = llvm::StructType::get(ctx, elts, packed);
+
+  llvm::Type *unpaddedType = coercionType;
+  if (hasPadding) {
+    elts.clear();
+    for (auto &entry : Entries) {
+      elts.push_back(entry.Type);
+    }
+    if (elts.size() == 1) {
+      unpaddedType = elts[0];
+    } else {
+      unpaddedType = llvm::StructType::get(ctx, elts, /*packed*/ false);
+    }
+  } else if (Entries.size() == 1) {
+    unpaddedType = Entries[0].Type;
+  }
+
+  return { coercionType, unpaddedType };
+}
+
+bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const {
+  assert(Finished && "haven't yet finished lowering");
+
+  // Empty types don't need to be passed indirectly.
+  if (Entries.empty()) return false;
+
+  CharUnits totalSize = Entries.back().End;
+
+  // Avoid copying the array of types when there's just a single element.
+  if (Entries.size() == 1) {
+    return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
+                                                           Entries.back().Type,
+                                                             asReturnValue);    
+  }
+
+  SmallVector<llvm::Type*, 8> componentTys;
+  componentTys.reserve(Entries.size());
+  for (auto &entry : Entries) {
+    componentTys.push_back(entry.Type);
+  }
+  return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
+                                                           componentTys,
+                                                           asReturnValue);
+}
+
+CharUnits swiftcall::getMaximumVoluntaryIntegerSize(CodeGenModule &CGM) {
+  // Currently always the size of an ordinary pointer.
+  return CGM.getContext().toCharUnitsFromBits(
+           CGM.getContext().getTargetInfo().getPointerWidth(0));
+}
+
+CharUnits swiftcall::getNaturalAlignment(CodeGenModule &CGM, llvm::Type *type) {
+  // For Swift's purposes, this is always just the store size of the type
+  // rounded up to a power of 2.
+  auto size = (unsigned long long) getTypeStoreSize(CGM, type).getQuantity();
+  if (!isPowerOf2(size)) {
+    size = 1ULL << (llvm::findLastSet(size, llvm::ZB_Undefined) + 1);
+  }
+  assert(size >= CGM.getDataLayout().getABITypeAlignment(type));
+  return CharUnits::fromQuantity(size);
+}
+
+bool swiftcall::isLegalIntegerType(CodeGenModule &CGM,
+                                   llvm::IntegerType *intTy) {
+  auto size = intTy->getBitWidth();
+  switch (size) {
+  case 1:
+  case 8:
+  case 16:
+  case 32:
+  case 64:
+    // Just assume that the above are always legal.
+    return true;
+
+  case 128:
+    return CGM.getContext().getTargetInfo().hasInt128Type();
+
+  default:
+    return false;
+  }
+}
+
+bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                  llvm::VectorType *vectorTy) {
+  return isLegalVectorType(CGM, vectorSize, vectorTy->getElementType(),
+                           vectorTy->getNumElements());
+}
+
+bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                  llvm::Type *eltTy, unsigned numElts) {
+  assert(numElts > 1 && "illegal vector length");
+  return getSwiftABIInfo(CGM)
+           .isLegalVectorTypeForSwift(vectorSize, eltTy, numElts);
+}
+
+std::pair<llvm::Type*, unsigned>
+swiftcall::splitLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                llvm::VectorType *vectorTy) {
+  auto numElts = vectorTy->getNumElements();
+  auto eltTy = vectorTy->getElementType();
+
+  // Try to split the vector type in half.
+  if (numElts >= 4 && isPowerOf2(numElts)) {
+    if (isLegalVectorType(CGM, vectorSize / 2, eltTy, numElts / 2))
+      return {llvm::VectorType::get(eltTy, numElts / 2), 2};
+  }
+
+  return {eltTy, numElts};
+}
+
+void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize,
+                                   llvm::VectorType *origVectorTy,
+                             llvm::SmallVectorImpl<llvm::Type*> &components) {
+  // If it's already a legal vector type, use it.
+  if (isLegalVectorType(CGM, origVectorSize, origVectorTy)) {
+    components.push_back(origVectorTy);
+    return;
+  }
+
+  // Try to split the vector into legal subvectors.
+  auto numElts = origVectorTy->getNumElements();
+  auto eltTy = origVectorTy->getElementType();
+  assert(numElts != 1);
+
+  // The largest size that we're still considering making subvectors of.
+  // Always a power of 2.
+  unsigned logCandidateNumElts = llvm::findLastSet(numElts, llvm::ZB_Undefined);
+  unsigned candidateNumElts = 1U << logCandidateNumElts;
+  assert(candidateNumElts <= numElts && candidateNumElts * 2 > numElts);
+
+  // Minor optimization: don't check the legality of this exact size twice.
+  if (candidateNumElts == numElts) {
+    logCandidateNumElts--;
+    candidateNumElts >>= 1;
+  }
+
+  CharUnits eltSize = (origVectorSize / numElts);
+  CharUnits candidateSize = eltSize * candidateNumElts;
+
+  // The sensibility of this algorithm relies on the fact that we never
+  // have a legal non-power-of-2 vector size without having the power of 2
+  // also be legal.
+  while (logCandidateNumElts > 0) {
+    assert(candidateNumElts == 1U << logCandidateNumElts);
+    assert(candidateNumElts <= numElts);
+    assert(candidateSize == eltSize * candidateNumElts);
+
+    // Skip illegal vector sizes.
+    if (!isLegalVectorType(CGM, candidateSize, eltTy, candidateNumElts)) {
+      logCandidateNumElts--;
+      candidateNumElts /= 2;
+      candidateSize /= 2;
+      continue;
+    }
+
+    // Add the right number of vectors of this size.
+    auto numVecs = numElts >> logCandidateNumElts;
+    components.append(numVecs, llvm::VectorType::get(eltTy, candidateNumElts));
+    numElts -= (numVecs << logCandidateNumElts);
+
+    if (numElts == 0) return;
+
+    // It's possible that the number of elements remaining will be legal.
+    // This can happen with e.g. <7 x float> when <3 x float> is legal.
+    // This only needs to be separately checked if it's not a power of 2.
+    if (numElts > 2 && !isPowerOf2(numElts) &&
+        isLegalVectorType(CGM, eltSize * numElts, eltTy, numElts)) {
+      components.push_back(llvm::VectorType::get(eltTy, numElts));
+      return;
+    }
+
+    // Bring vecSize down to something no larger than numElts.
+    do {
+      logCandidateNumElts--;
+      candidateNumElts /= 2;
+      candidateSize /= 2;
+    } while (candidateNumElts > numElts);
+  }
+
+  // Otherwise, just append a bunch of individual elements.
+  components.append(numElts, eltTy);
+}
+
+bool swiftcall::shouldPassCXXRecordIndirectly(CodeGenModule &CGM,
+                                              const CXXRecordDecl *record) {
+  // Following a recommendation from Richard Smith, pass a C++ type
+  // indirectly only if the destructor is non-trivial or *all* of the
+  // copy/move constructors are deleted or non-trivial.
+
+  if (record->hasNonTrivialDestructor())
+    return true;
+
+  // It would be nice if this were summarized on the CXXRecordDecl.
+  for (auto ctor : record->ctors()) {
+    if (ctor->isCopyOrMoveConstructor() && !ctor->isDeleted() &&
+        ctor->isTrivial()) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static ABIArgInfo classifyExpandedType(SwiftAggLowering &lowering,
+                                       bool forReturn,
+                                       CharUnits alignmentForIndirect) {
+  if (lowering.empty()) {
+    return ABIArgInfo::getIgnore();
+  } else if (lowering.shouldPassIndirectly(forReturn)) {
+    return ABIArgInfo::getIndirect(alignmentForIndirect, /*byval*/ false);
+  } else {
+    auto types = lowering.getCoerceAndExpandTypes();
+    return ABIArgInfo::getCoerceAndExpand(types.first, types.second);
+  }
+}
+
+static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type,
+                               bool forReturn) {
+  if (auto recordType = dyn_cast<RecordType>(type)) {
+    auto record = recordType->getDecl();
+    auto &layout = CGM.getContext().getASTRecordLayout(record);
+
+    if (auto cxxRecord = dyn_cast<CXXRecordDecl>(record)) {
+      if (shouldPassCXXRecordIndirectly(CGM, cxxRecord))
+        return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false);
+    }
+
+    SwiftAggLowering lowering(CGM);
+    lowering.addTypedData(recordType->getDecl(), CharUnits::Zero(), layout);
+    lowering.finish();
+
+    return classifyExpandedType(lowering, forReturn, layout.getAlignment());
+  }
+
+  // Just assume that all of our target ABIs can support returning at least
+  // two integer or floating-point values.
+  if (isa<ComplexType>(type)) {
+    return (forReturn ? ABIArgInfo::getDirect() : ABIArgInfo::getExpand());
+  }
+
+  // Vector types may need to be legalized.
+  if (isa<VectorType>(type)) {
+    SwiftAggLowering lowering(CGM);
+    lowering.addTypedData(type, CharUnits::Zero());
+    lowering.finish();
+
+    CharUnits alignment = CGM.getContext().getTypeAlignInChars(type);
+    return classifyExpandedType(lowering, forReturn, alignment);
+  }
+
+  // Member pointer types need to be expanded, but it's a simple form of
+  // expansion that 'Direct' can handle.  Note that CanBeFlattened should be
+  // true for this to work.
+
+  // 'void' needs to be ignored.
+  if (type->isVoidType()) {
+    return ABIArgInfo::getIgnore();
+  }
+
+  // Everything else can be passed directly.
+  return ABIArgInfo::getDirect();
+}
+
+ABIArgInfo swiftcall::classifyReturnType(CodeGenModule &CGM, CanQualType type) {
+  return classifyType(CGM, type, /*forReturn*/ true);
+}
+
+ABIArgInfo swiftcall::classifyArgumentType(CodeGenModule &CGM,
+                                           CanQualType type) {
+  return classifyType(CGM, type, /*forReturn*/ false);
+}
+
+void swiftcall::computeABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
+  auto &retInfo = FI.getReturnInfo();
+  retInfo = classifyReturnType(CGM, FI.getReturnType());
+
+  for (unsigned i = 0, e = FI.arg_size(); i != e; ++i) {
+    auto &argInfo = FI.arg_begin()[i];
+    argInfo.info = classifyArgumentType(CGM, argInfo.type);
+  }
+}
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
index 3d1ddef..bc03616 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp
@@ -19,6 +19,7 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/SwiftCallingConv.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
@@ -68,6 +69,46 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
 ABIInfo::~ABIInfo() {}
 
+/// Does the given lowering require more than the given number of
+/// registers when expanded?
+///
+/// This is intended to be the basis of a reasonable basic implementation
+/// of should{Pass,Return}IndirectlyForSwift.
+///
+/// For most targets, a limit of four total registers is reasonable; this
+/// limits the amount of code required in order to move around the value
+/// in case it wasn't produced immediately prior to the call by the caller
+/// (or wasn't produced in exactly the right registers) or isn't used
+/// immediately within the callee.  But some targets may need to further
+/// limit the register count due to an inability to support that many
+/// return registers.
+static bool occupiesMoreThan(CodeGenTypes &cgt,
+                             ArrayRef<llvm::Type*> scalarTypes,
+                             unsigned maxAllRegisters) {
+  unsigned intCount = 0, fpCount = 0;
+  for (llvm::Type *type : scalarTypes) {
+    if (type->isPointerTy()) {
+      intCount++;
+    } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
+      auto ptrWidth = cgt.getTarget().getPointerWidth(0);
+      intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
+    } else {
+      assert(type->isVectorTy() || type->isFloatingPointTy());
+      fpCount++;
+    }
+  }
+
+  return (intCount + fpCount > maxAllRegisters);
+}
+
+bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
+                                             llvm::Type *eltTy,
+                                             unsigned numElts) const {
+  // The default implementation of this assumes that the target guarantees
+  // 128-bit SIMD support but nothing more.
+  return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16);
+}
+
 static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
                                               CGCXXABI &CXXABI) {
   const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
@@ -117,6 +158,8 @@ const TargetInfo &ABIInfo::getTarget() const {
   return CGT.getTarget();
 }
 
+bool ABIInfo:: isAndroid() const { return getTarget().getTriple().isAndroid(); }
+
 bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   return false;
 }
@@ -130,7 +173,7 @@ bool ABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
   return false;
 }
 
-void ABIArgInfo::dump() const {
+LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
   raw_ostream &OS = llvm::errs();
   OS << "(ABIArgInfo Kind=";
   switch (TheKind) {
@@ -158,6 +201,10 @@ void ABIArgInfo::dump() const {
   case Expand:
     OS << "Expand";
     break;
+  case CoerceAndExpand:
+    OS << "CoerceAndExpand Type=";
+    getCoerceAndExpandType()->print(OS);
+    break;
   }
   OS << ")\n";
 }
@@ -217,7 +264,7 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
   }
 
   // Advance the pointer past the argument, then store that back.
-  CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize);
+  CharUnits FullDirectSize = DirectSize.alignTo(SlotSize);
   llvm::Value *NextPtr =
     CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize,
                                            "argp.next");
@@ -225,7 +272,8 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
 
   // If the argument is smaller than a slot, and this is a big-endian
   // target, the argument will be right-adjusted in its slot.
-  if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian()) {
+  if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() &&
+      !DirectTy->isStructTy()) {
     Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize);
   }
 
@@ -324,6 +372,9 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
   Opt += Lib;
 }
 
+unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
+  return llvm::CallingConv::C;
+}
 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
 
 /// isEmptyField - Return true iff a the field is "empty", that is it
@@ -364,7 +415,7 @@ static bool isEmptyField(ASTContext &Context, const FieldDecl *FD,
 static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
   const RecordType *RT = T->getAs<RecordType>();
   if (!RT)
-    return 0;
+    return false;
   const RecordDecl *RD = RT->getDecl();
   if (RD->hasFlexibleArrayMember())
     return false;
@@ -456,73 +507,55 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
   return Found;
 }
 
-static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
-  // Treat complex types as the element type.
-  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
-    Ty = CTy->getElementType();
-
-  // Check for a type which we know has a simple scalar argument-passing
-  // convention without any padding.  (We're specifically looking for 32
-  // and 64-bit integer and integer-equivalents, float, and double.)
-  if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
-      !Ty->isEnumeralType() && !Ty->isBlockPointerType())
-    return false;
-
-  uint64_t Size = Context.getTypeSize(Ty);
-  return Size == 32 || Size == 64;
-}
-
-/// canExpandIndirectArgument - Test whether an argument type which is to be
-/// passed indirectly (on the stack) would have the equivalent layout if it was
-/// expanded into separate arguments. If so, we prefer to do the latter to avoid
-/// inhibiting optimizations.
-///
-// FIXME: This predicate is missing many cases, currently it just follows
-// llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We
-// should probably make this smarter, or better yet make the LLVM backend
-// capable of handling it.
-static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) {
-  // We can only expand structure types.
-  const RecordType *RT = Ty->getAs<RecordType>();
-  if (!RT)
-    return false;
-
-  // We can only expand (C) structures.
-  //
-  // FIXME: This needs to be generalized to handle classes as well.
-  const RecordDecl *RD = RT->getDecl();
-  if (!RD->isStruct())
-    return false;
-
-  // We try to expand CLike CXXRecordDecl.
-  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
-    if (!CXXRD->isCLike())
-      return false;
-  }
-
-  uint64_t Size = 0;
-
-  for (const auto *FD : RD->fields()) {
-    if (!is32Or64BitBasicType(FD->getType(), Context))
-      return false;
+namespace {
+Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
+                       const ABIArgInfo &AI) {
+  // This default implementation defers to the llvm backend's va_arg
+  // instruction. It can handle only passing arguments directly
+  // (typically only handled in the backend for primitive types), or
+  // aggregates passed indirectly by pointer (NOTE: if the "byval"
+  // flag has ABI impact in the callee, this implementation cannot
+  // work.)
+
+  // Only a few cases are covered here at the moment -- those needed
+  // by the default abi.
+  llvm::Value *Val;
+
+  if (AI.isIndirect()) {
+    assert(!AI.getPaddingType() &&
+           "Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
+    assert(
+        !AI.getIndirectRealign() &&
+        "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!");
+
+    auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty);
+    CharUnits TyAlignForABI = TyInfo.second;
+
+    llvm::Type *BaseTy =
+        llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+    llvm::Value *Addr =
+        CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy);
+    return Address(Addr, TyAlignForABI);
+  } else {
+    assert((AI.isDirect() || AI.isExtend()) &&
+           "Unexpected ArgInfo Kind in generic VAArg emitter!");
 
-    // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
-    // how to expand them yet, and the predicate for telling if a bitfield still
-    // counts as "basic" is more complicated than what we were doing previously.
-    if (FD->isBitField())
-      return false;
+    assert(!AI.getInReg() &&
+           "Unexpected InReg seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getPaddingType() &&
+           "Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getDirectOffset() &&
+           "Unexpected DirectOffset seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getCoerceToType() &&
+           "Unexpected CoerceToType seen in arginfo in generic VAArg emitter!");
 
-    Size += Context.getTypeSize(FD->getType());
+    Address Temp = CGF.CreateMemTemp(Ty, "varet");
+    Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), CGF.ConvertType(Ty));
+    CGF.Builder.CreateStore(Val, Temp);
+    return Temp;
   }
-
-  // Make sure there are not any holes in the struct.
-  if (Size != Context.getTypeSize(Ty))
-    return false;
-
-  return true;
 }
 
-namespace {
 /// DefaultABIInfo - The default implementation for ABI specific
 /// details. This implementation provides information which results in
 /// self-consistent and sensible LLVM IR generation, but does not
@@ -542,7 +575,9 @@ public:
   }
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
-                    QualType Ty) const override;
+                    QualType Ty) const override {
+    return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
+  }
 };
 
 class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -551,11 +586,6 @@ public:
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
 };
 
-Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
-                                  QualType Ty) const {
-  return Address::invalid();
-}
-
 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
@@ -607,13 +637,17 @@ private:
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
   // DefaultABIInfo's classifyReturnType and classifyArgumentType are
-  // non-virtual, but computeInfo is virtual, so we overload that.
+  // non-virtual, but computeInfo and EmitVAArg are virtual, so we
+  // overload them.
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
     for (auto &Arg : FI.arguments())
       Arg.info = classifyArgumentType(Arg.type);
   }
+
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
@@ -665,6 +699,14 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
   return DefaultABIInfo::classifyReturnType(RetTy);
 }
 
+Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                      QualType Ty) const {
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false,
+                          getContext().getTypeInfoInChars(Ty),
+                          CharUnits::fromQuantity(4),
+                          /*AllowHigherAlign=*/ true);
+}
+
 //===----------------------------------------------------------------------===//
 // le32/PNaCl bitcode ABI Implementation
 //
@@ -700,7 +742,13 @@ void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
 Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
-  return Address::invalid();
+  // The PNaCL ABI is a bit odd, in that varargs don't use normal
+  // function classification. Structs get passed directly for varargs
+  // functions, through a rewriting transform in
+  // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows
+  // this target to actually support a va_arg instructions with an
+  // aggregate type, unlike other targets.
+  return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
 }
 
 /// \brief Classify argument of given type \p Ty.
@@ -797,7 +845,7 @@ struct CCState {
 };
 
 /// X86_32ABIInfo - The X86-32 ABI information.
-class X86_32ABIInfo : public ABIInfo {
+class X86_32ABIInfo : public SwiftABIInfo {
   enum Class {
     Integer,
     Float
@@ -849,6 +897,8 @@ class X86_32ABIInfo : public ABIInfo {
                                 bool &NeedsPadding) const;
   bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
 
+  bool canExpandIndirectArgument(QualType Ty) const;
+
   /// \brief Rewrite the function info so that all memory arguments use
   /// inalloca.
   void rewriteWithInAlloca(CGFunctionInfo &FI) const;
@@ -866,12 +916,22 @@ public:
   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                 bool RetSmallStructInRegABI, bool Win32StructABI,
                 unsigned NumRegisterParameters, bool SoftFloatABI)
-    : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
+    : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
       IsRetSmallStructInRegABI(RetSmallStructInRegABI), 
       IsWin32StructABI(Win32StructABI),
       IsSoftFloatABI(SoftFloatABI),
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
       DefaultNumRegisterParameters(NumRegisterParameters) {}
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    // LLVM's x86-32 lowering currently only assigns up to three
+    // integer registers and three fp registers.  Oddly, it'll use up to
+    // four vector registers for vectors, but those can overlap with the
+    // scalar registers.
+    return occupiesMoreThan(CGT, scalars, /*total*/ 3);
+  }  
 };
 
 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -920,6 +980,11 @@ public:
                    ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
+
+  StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
+    return "movl\t%ebp, %ebp"
+           "\t\t## marker for objc_retainAutoreleaseReturnValue";
+  }
 };
 
 }
@@ -1054,6 +1119,72 @@ bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
   return true;
 }
 
+static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
+  // Treat complex types as the element type.
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+    Ty = CTy->getElementType();
+
+  // Check for a type which we know has a simple scalar argument-passing
+  // convention without any padding.  (We're specifically looking for 32
+  // and 64-bit integer and integer-equivalents, float, and double.)
+  if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
+      !Ty->isEnumeralType() && !Ty->isBlockPointerType())
+    return false;
+
+  uint64_t Size = Context.getTypeSize(Ty);
+  return Size == 32 || Size == 64;
+}
+
+/// Test whether an argument type which is to be passed indirectly (on the
+/// stack) would have the equivalent layout if it was expanded into separate
+/// arguments. If so, we prefer to do the latter to avoid inhibiting
+/// optimizations.
+bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
+  // We can only expand structure types.
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (!RT)
+    return false;
+  const RecordDecl *RD = RT->getDecl();
+  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+    if (!IsWin32StructABI ) {
+      // On non-Windows, we have to conservatively match our old bitcode
+      // prototypes in order to be ABI-compatible at the bitcode level.
+      if (!CXXRD->isCLike())
+        return false;
+    } else {
+      // Don't do this for dynamic classes.
+      if (CXXRD->isDynamicClass())
+        return false;
+      // Don't do this if there are any non-empty bases.
+      for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
+        if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true))
+          return false;
+      }
+    }
+  }
+
+  uint64_t Size = 0;
+
+  for (const auto *FD : RD->fields()) {
+    // Scalar arguments on the stack get 4 byte alignment on x86. If the
+    // argument is smaller than 32-bits, expanding the struct will create
+    // alignment padding.
+    if (!is32Or64BitBasicType(FD->getType(), getContext()))
+      return false;
+
+    // FIXME: Reject bit-fields wholesale; there are two problems, we don't know
+    // how to expand them yet, and the predicate for telling if a bitfield still
+    // counts as "basic" is more complicated than what we were doing previously.
+    if (FD->isBitField())
+      return false;
+
+    Size += getContext().getTypeSize(FD->getType());
+  }
+
+  // We can do this if there was no alignment padding.
+  return Size == getContext().getTypeSize(Ty);
+}
+
 ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
   // If the return value is indirect, then the hidden argument is consuming one
   // integer register.
@@ -1114,6 +1245,10 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
     if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType())
       return getIndirectReturnResult(RetTy, State);
 
+    // Ignore empty structs/unions.
+    if (isEmptyRecord(getContext(), RetTy, true))
+      return ABIArgInfo::getIgnore();
+
     // Small structures which are register sized are generally returned
     // in a register.
     if (shouldReturnTypeInRegister(RetTy, getContext())) {
@@ -1266,6 +1401,12 @@ bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
 bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 
                                              bool &InReg,
                                              bool &NeedsPadding) const {
+  // On Windows, aggregates other than HFAs are never passed in registers, and
+  // they do not consume register slots. Homogenous floating-point aggregates
+  // (HFAs) have already been dealt with at this point.
+  if (IsWin32StructABI && isAggregateTypeForABI(Ty))
+    return false;
+
   NeedsPadding = false;
   InReg = !IsMCUABI;
 
@@ -1339,23 +1480,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
   }
 
   if (isAggregateTypeForABI(Ty)) {
-    if (RT) {
-      // Structs are always byval on win32, regardless of what they contain.
-      if (IsWin32StructABI)
-        return getIndirectResult(Ty, true, State);
+    // Structures with flexible arrays are always indirect.
+    // FIXME: This should not be byval!
+    if (RT && RT->getDecl()->hasFlexibleArrayMember())
+      return getIndirectResult(Ty, true, State);
 
-      // Structures with flexible arrays are always indirect.
-      if (RT->getDecl()->hasFlexibleArrayMember())
-        return getIndirectResult(Ty, true, State);
-    }
-
-    // Ignore empty structs/unions.
-    if (isEmptyRecord(getContext(), Ty, true))
+    // Ignore empty structs/unions on non-Windows.
+    if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
       return ABIArgInfo::getIgnore();
 
     llvm::LLVMContext &LLVMContext = getVMContext();
     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
-    bool NeedsPadding, InReg;
+    bool NeedsPadding = false;
+    bool InReg;
     if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
       unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
@@ -1373,9 +1510,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
     // optimizations.
     // Don't do this for the MCU if there are still free integer registers
     // (see X86_64 ABI for full explanation).
-    if (getContext().getTypeSize(Ty) <= 4*32 &&
-        canExpandIndirectArgument(Ty, getContext()) &&
-        (!IsMCUABI || State.FreeRegs == 0))
+    if (getContext().getTypeSize(Ty) <= 4 * 32 &&
+        (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
       return ABIArgInfo::getExpandWithPadding(
           State.CC == llvm::CallingConv::X86_FastCall ||
               State.CC == llvm::CallingConv::X86_VectorCall,
@@ -1474,7 +1610,7 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
 
   // Insert padding bytes to respect alignment.
   CharUnits FieldEnd = StackOffset;
-  StackOffset = FieldEnd.RoundUpToAlignment(FieldAlign);
+  StackOffset = FieldEnd.alignTo(FieldAlign);
   if (StackOffset != FieldEnd) {
     CharUnits NumBytes = StackOffset - FieldEnd;
     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
@@ -1495,10 +1631,14 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
     return false;
   case ABIArgInfo::Direct:
   case ABIArgInfo::Extend:
-  case ABIArgInfo::Expand:
     if (Info.getInReg())
       return false;
     return true;
+  case ABIArgInfo::Expand:
+  case ABIArgInfo::CoerceAndExpand:
+    // These are aggregate types which are never passed in registers when
+    // inalloca is involved.
+    return true;
   }
   llvm_unreachable("invalid enum");
 }
@@ -1609,6 +1749,10 @@ void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                               llvm::AttributeSet::FunctionIndex,
                                               B));
     }
+    if (FD->hasAttr<AnyX86InterruptAttr>()) {
+      llvm::Function *Fn = cast<llvm::Function>(GV);
+      Fn->setCallingConv(llvm::CallingConv::X86_INTR);
+    }
   }
 }
 
@@ -1675,7 +1819,7 @@ static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
 }
 
 /// X86_64ABIInfo - The X86_64 ABI information.
-class X86_64ABIInfo : public ABIInfo {
+class X86_64ABIInfo : public SwiftABIInfo {
   enum Class {
     Integer = 0,
     SSE,
@@ -1779,6 +1923,17 @@ class X86_64ABIInfo : public ABIInfo {
     return !getTarget().getTriple().isOSDarwin();
   }
 
+  /// GCC classifies <1 x long long> as SSE but compatibility with older clang
+  // compilers require us to classify it as INTEGER.
+  bool classifyIntegerMMXAsSSE() const {
+    const llvm::Triple &Triple = getTarget().getTriple();
+    if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::PS4)
+      return false;
+    if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10)
+      return false;
+    return true;
+  }
+
   X86AVXABILevel AVXLevel;
   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
   // 64-bit hardware.
@@ -1786,7 +1941,7 @@ class X86_64ABIInfo : public ABIInfo {
 
 public:
   X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) :
-      ABIInfo(CGT), AVXLevel(AVXLevel),
+      SwiftABIInfo(CGT), AVXLevel(AVXLevel),
       Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
   }
 
@@ -1813,6 +1968,12 @@ public:
   bool has64BitPointers() const {
     return Has64BitPointers;
   }
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }  
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
@@ -1914,6 +2075,16 @@ public:
             ('T' << 24);
     return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
   }
+
+  void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &CGM) const override {
+    if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+      if (FD->hasAttr<AnyX86InterruptAttr>()) {
+        llvm::Function *Fn = cast<llvm::Function>(GV);
+        Fn->setCallingConv(llvm::CallingConv::X86_INTR);
+      }
+    }
+  }
 };
 
 class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo {
@@ -2031,6 +2202,13 @@ void WinX86_64TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                             CodeGen::CodeGenModule &CGM) const {
   TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
 
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+    if (FD->hasAttr<AnyX86InterruptAttr>()) {
+      llvm::Function *Fn = cast<llvm::Function>(GV);
+      Fn->setCallingConv(llvm::CallingConv::X86_INTR);
+    }
+  }
+
   addStackProbeSizeTargetAttribute(D, GV, CGM);
 }
 }
@@ -2203,15 +2381,20 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
       if (EB_Lo != EB_Hi)
         Hi = Lo;
     } else if (Size == 64) {
+      QualType ElementType = VT->getElementType();
+
       // gcc passes <1 x double> in memory. :(
-      if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::Double))
+      if (ElementType->isSpecificBuiltinType(BuiltinType::Double))
         return;
 
-      // gcc passes <1 x long long> as INTEGER.
-      if (VT->getElementType()->isSpecificBuiltinType(BuiltinType::LongLong) ||
-          VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULongLong) ||
-          VT->getElementType()->isSpecificBuiltinType(BuiltinType::Long) ||
-          VT->getElementType()->isSpecificBuiltinType(BuiltinType::ULong))
+      // gcc passes <1 x long long> as SSE but clang used to unconditionally
+      // pass them as integer.  For platforms where clang is the de facto
+      // platform compiler, we must continue to use integer.
+      if (!classifyIntegerMMXAsSSE() &&
+          (ElementType->isSpecificBuiltinType(BuiltinType::LongLong) ||
+           ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) ||
+           ElementType->isSpecificBuiltinType(BuiltinType::Long) ||
+           ElementType->isSpecificBuiltinType(BuiltinType::ULong)))
         Current = Integer;
       else
         Current = SSE;
@@ -2775,7 +2958,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi,
   // the second element at offset 8.  Check for this:
   unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo);
   unsigned HiAlign = TD.getABITypeAlignment(Hi);
-  unsigned HiStart = llvm::RoundUpToAlignment(LoSize, HiAlign);
+  unsigned HiStart = llvm::alignTo(LoSize, HiAlign);
   assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!");
 
   // To handle this, we have to increase the size of the low part so that the
@@ -3473,13 +3656,15 @@ public:
 
 }
 
+// TODO: this implementation is now likely redundant with
+// DefaultABIInfo::EmitVAArg.
 Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
                                       QualType Ty) const {
   const unsigned OverflowLimit = 8;
   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
     // TODO: Implement this. For now ignore.
     (void)CTy;
-    return Address::invalid();
+    return Address::invalid(); // FIXME?
   }
 
   // struct __va_list_tag {
@@ -3578,7 +3763,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
     CharUnits Size;
     if (!isIndirect) {
       auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
-      Size = TypeInfo.first.RoundUpToAlignment(OverflowAreaAlign);
+      Size = TypeInfo.first.alignTo(OverflowAreaAlign);
     } else {
       Size = CGF.getPointerSize();
     }
@@ -3663,7 +3848,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
 
 namespace {
 /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
-class PPC64_SVR4_ABIInfo : public DefaultABIInfo {
+class PPC64_SVR4_ABIInfo : public ABIInfo {
 public:
   enum ABIKind {
     ELFv1 = 0,
@@ -3674,6 +3859,7 @@ private:
   static const unsigned GPRBits = 64;
   ABIKind Kind;
   bool HasQPX;
+  bool IsSoftFloatABI;
 
   // A vector of float or double will be promoted to <4 x f32> or <4 x f64> and
   // will be passed in a QPX register.
@@ -3704,8 +3890,10 @@ private:
   }
 
 public:
-  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX)
-    : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
+  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX,
+                     bool SoftFloatABI)
+      : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX),
+        IsSoftFloatABI(SoftFloatABI) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
   CharUnits getParamTypeAlignment(QualType Ty) const;
@@ -3753,8 +3941,10 @@ class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
 
 public:
   PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
-                               PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX)
-      : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX)) {}
+                               PPC64_SVR4_ABIInfo::ABIKind Kind, bool HasQPX,
+                               bool SoftFloatABI)
+      : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind, HasQPX,
+                                                 SoftFloatABI)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
@@ -3945,8 +4135,19 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
     // agree in both total size and mode (float vs. vector) are
     // treated as being equivalent here.
     const Type *TyPtr = Ty.getTypePtr();
-    if (!Base)
+    if (!Base) {
       Base = TyPtr;
+      // If it's a non-power-of-2 vector, its size is already a power-of-2,
+      // so make sure to widen it explicitly.
+      if (const VectorType *VT = Base->getAs<VectorType>()) {
+        QualType EltTy = VT->getElementType();
+        unsigned NumElements =
+            getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy);
+        Base = getContext()
+                   .getVectorType(EltTy, NumElements, VT->getVectorKind())
+                   .getTypePtr();
+      }
+    }
 
     if (Base->isVectorType() != TyPtr->isVectorType() ||
         getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
@@ -3961,8 +4162,11 @@ bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
     if (BT->getKind() == BuiltinType::Float ||
         BT->getKind() == BuiltinType::Double ||
-        BT->getKind() == BuiltinType::LongDouble)
+        BT->getKind() == BuiltinType::LongDouble) {
+      if (IsSoftFloatABI)
+        return false;
       return true;
+    }
   }
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     if (getContext().getTypeSize(VT) == 128 || IsQPXVectorTy(Ty))
@@ -4029,13 +4233,13 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
       // Types up to 8 bytes are passed as integer type (which will be
       // properly aligned in the argument save area doubleword).
       if (Bits <= GPRBits)
-        CoerceTy = llvm::IntegerType::get(getVMContext(),
-                                          llvm::RoundUpToAlignment(Bits, 8));
+        CoerceTy =
+            llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
       // Larger types are passed as arrays, with the base type selected
       // according to the required alignment in the save area.
       else {
         uint64_t RegBits = ABIAlign * 8;
-        uint64_t NumRegs = llvm::RoundUpToAlignment(Bits, RegBits) / RegBits;
+        uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits;
         llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
         CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
       }
@@ -4095,8 +4299,8 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
         CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
         CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, nullptr);
       } else
-        CoerceTy = llvm::IntegerType::get(getVMContext(),
-                                          llvm::RoundUpToAlignment(Bits, 8));
+        CoerceTy =
+            llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
       return ABIArgInfo::getDirect(CoerceTy);
     }
 
@@ -4220,7 +4424,7 @@ PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
 
 namespace {
 
-class AArch64ABIInfo : public ABIInfo {
+class AArch64ABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     AAPCS = 0,
@@ -4231,7 +4435,8 @@ private:
   ABIKind Kind;
 
 public:
-  AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
+  AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind)
+    : SwiftABIInfo(CGT), Kind(Kind) {}
 
 private:
   ABIKind getABIKind() const { return Kind; }
@@ -4264,6 +4469,12 @@ private:
     return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
                          : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
   }
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }
 };
 
 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4289,6 +4500,11 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
+    // Android promotes <2 x i8> to i16, not i32
+    if (isAndroid() && (Size <= 16)) {
+      llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
+      return ABIArgInfo::getDirect(ResType);
+    }
     if (Size <= 32) {
       llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
       return ABIArgInfo::getDirect(ResType);
@@ -4409,8 +4625,8 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
     // Check whether VT is legal.
     unsigned NumElements = VT->getNumElements();
     uint64_t Size = getContext().getTypeSize(VT);
-    // NumElements should be power of 2 between 1 and 16.
-    if ((NumElements & (NumElements - 1)) != 0 || NumElements > 16)
+    // NumElements should be power of 2.
+    if (!llvm::isPowerOf2_32(NumElements))
       return true;
     return Size != 64 && (Size != 128 || NumElements == 1);
   }
@@ -4489,7 +4705,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
     reg_top_index = 1; // field number for __gr_top
     reg_top_offset = CharUnits::fromQuantity(8);
-    RegSize = llvm::RoundUpToAlignment(RegSize, 8);
+    RegSize = llvm::alignTo(RegSize, 8);
   } else {
     // 4 is the field number of __vr_offs.
     reg_offs_p =
@@ -4659,7 +4875,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
   if (IsIndirect)
     StackSize = StackSlotSize;
   else
-    StackSize = TyInfo.first.RoundUpToAlignment(StackSlotSize);
+    StackSize = TyInfo.first.alignTo(StackSlotSize);
 
   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
   llvm::Value *NewStack =
@@ -4699,7 +4915,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
   // illegal vector types.  Lower VAArg here for these cases and use
   // the LLVM va_arg instruction for everything else.
   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
-    return Address::invalid();
+    return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
 
   CharUnits SlotSize = CharUnits::fromQuantity(8);
 
@@ -4733,7 +4949,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
 
 namespace {
 
-class ARMABIInfo : public ABIInfo {
+class ARMABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     APCS = 0,
@@ -4746,7 +4962,8 @@ private:
   ABIKind Kind;
 
 public:
-  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
+  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind)
+      : SwiftABIInfo(CGT), Kind(_Kind) {
     setCCs();
   }
 
@@ -4757,6 +4974,8 @@ public:
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABI:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::MuslEABI:
+    case llvm::Triple::MuslEABIHF:
       return true;
     default:
       return false;
@@ -4767,17 +4986,13 @@ public:
     switch (getTarget().getTriple().getEnvironment()) {
     case llvm::Triple::EABIHF:
     case llvm::Triple::GNUEABIHF:
+    case llvm::Triple::MuslEABIHF:
       return true;
     default:
       return false;
     }
   }
 
-  bool isAndroid() const {
-    return (getTarget().getTriple().getEnvironment() ==
-            llvm::Triple::Android);
-  }
-
   ABIKind getABIKind() const { return Kind; }
 
 private:
@@ -4797,6 +5012,12 @@ private:
   llvm::CallingConv::ID getLLVMDefaultCC() const;
   llvm::CallingConv::ID getABIDefaultCC() const;
   void setCCs();
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4877,6 +5098,16 @@ public:
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
+
+  void getDependentLibraryOption(llvm::StringRef Lib,
+                                 llvm::SmallString<24> &Opt) const override {
+    Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
+  }
+
+  void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
+                               llvm::SmallString<32> &Opt) const override {
+    Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
+  }
 };
 
 void WindowsARMTargetCodeGenInfo::setTargetAttributes(
@@ -4906,7 +5137,7 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
 /// Return the default calling convention that LLVM will use.
 llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
   // The default calling convention that LLVM will infer.
-  if (isEABIHF() || getTarget().getTriple().isWatchOS())
+  if (isEABIHF() || getTarget().getTriple().isWatchABI())
     return llvm::CallingConv::ARM_AAPCS_VFP;
   else if (isEABI())
     return llvm::CallingConv::ARM_AAPCS;
@@ -4988,7 +5219,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
   // __fp16 gets passed as if it were an int or float, but with the top 16 bits
   // unspecified. This is not done for OpenCL as it handles the half type
   // natively, and does not need to interwork with AAPCS code.
-  if (Ty->isHalfType() && !getContext().getLangOpts().OpenCL) {
+  if (Ty->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
     llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
@@ -5180,7 +5411,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
   // __fp16 gets returned as if it were an int or float, but with the top 16
   // bits unspecified. This is not done for OpenCL as it handles the half type
   // natively, and does not need to interwork with AAPCS code.
-  if (RetTy->isHalfType() && !getContext().getLangOpts().OpenCL) {
+  if (RetTy->isHalfType() && !getContext().getLangOpts().NativeHalfArgsAndReturns) {
     llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
@@ -5257,7 +5488,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
   } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) {
     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext());
     llvm::Type *CoerceTy =
-        llvm::ArrayType::get(Int32Ty, llvm::RoundUpToAlignment(Size, 32) / 32);
+        llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32);
     return ABIArgInfo::getDirect(CoerceTy);
   }
 
@@ -5513,12 +5744,12 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,
 
 namespace {
 
-class SystemZABIInfo : public ABIInfo {
+class SystemZABIInfo : public SwiftABIInfo {
   bool HasVector;
 
 public:
   SystemZABIInfo(CodeGenTypes &CGT, bool HV)
-    : ABIInfo(CGT), HasVector(HV) {}
+    : SwiftABIInfo(CGT), HasVector(HV) {}
 
   bool isPromotableIntegerType(QualType Ty) const;
   bool isCompoundType(QualType Ty) const;
@@ -5538,6 +5769,12 @@ public:
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override;
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }
 };
 
 class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -6067,8 +6304,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
 
   Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
                    (uint64_t)StackAlignInBytes);
-  unsigned CurrOffset = llvm::RoundUpToAlignment(Offset, Align);
-  Offset = CurrOffset + llvm::RoundUpToAlignment(TySize, Align * 8) / 8;
+  unsigned CurrOffset = llvm::alignTo(Offset, Align);
+  Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;
 
   if (isAggregateTypeForABI(Ty) || Ty->isVectorType()) {
     // Ignore empty aggregates.
@@ -6465,6 +6702,132 @@ Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 }
 
 //===----------------------------------------------------------------------===//
+// Lanai ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+class LanaiABIInfo : public DefaultABIInfo {
+public:
+  LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+
+  bool shouldUseInReg(QualType Ty, CCState &State) const;
+
+  void computeInfo(CGFunctionInfo &FI) const override {
+    CCState State(FI.getCallingConvention());
+    // Lanai uses 4 registers to pass arguments unless the function has the
+    // regparm attribute set.
+    if (FI.getHasRegParm()) {
+      State.FreeRegs = FI.getRegParm();
+    } else {
+      State.FreeRegs = 4;
+    }
+
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (auto &I : FI.arguments())
+      I.info = classifyArgumentType(I.type, State);
+  }
+
+  ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
+};
+} // end anonymous namespace
+
+bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const {
+  unsigned Size = getContext().getTypeSize(Ty);
+  unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U;
+
+  if (SizeInRegs == 0)
+    return false;
+
+  if (SizeInRegs > State.FreeRegs) {
+    State.FreeRegs = 0;
+    return false;
+  }
+
+  State.FreeRegs -= SizeInRegs;
+
+  return true;
+}
+
+ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal,
+                                           CCState &State) const {
+  if (!ByVal) {
+    if (State.FreeRegs) {
+      --State.FreeRegs; // Non-byval indirects just use one pointer.
+      return getNaturalAlignIndirectInReg(Ty);
+    }
+    return getNaturalAlignIndirect(Ty, false);
+  }
+
+  // Compute the byval alignment.
+  const unsigned MinABIStackAlignInBytes = 4;
+  unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
+  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true,
+                                 /*Realign=*/TypeAlign >
+                                     MinABIStackAlignInBytes);
+}
+
+ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
+                                              CCState &State) const {
+  // Check with the C++ ABI first.
+  const RecordType *RT = Ty->getAs<RecordType>();
+  if (RT) {
+    CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+    if (RAA == CGCXXABI::RAA_Indirect) {
+      return getIndirectResult(Ty, /*ByVal=*/false, State);
+    } else if (RAA == CGCXXABI::RAA_DirectInMemory) {
+      return getNaturalAlignIndirect(Ty, /*ByRef=*/true);
+    }
+  }
+
+  if (isAggregateTypeForABI(Ty)) {
+    // Structures with flexible arrays are always indirect.
+    if (RT && RT->getDecl()->hasFlexibleArrayMember())
+      return getIndirectResult(Ty, /*ByVal=*/true, State);
+
+    // Ignore empty structs/unions.
+    if (isEmptyRecord(getContext(), Ty, true))
+      return ABIArgInfo::getIgnore();
+
+    llvm::LLVMContext &LLVMContext = getVMContext();
+    unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
+    if (SizeInRegs <= State.FreeRegs) {
+      llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
+      SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
+      llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
+      State.FreeRegs -= SizeInRegs;
+      return ABIArgInfo::getDirectInReg(Result);
+    } else {
+      State.FreeRegs = 0;
+    }
+    return getIndirectResult(Ty, true, State);
+  }
+
+  // Treat an enum type as its underlying type.
+  if (const auto *EnumTy = Ty->getAs<EnumType>())
+    Ty = EnumTy->getDecl()->getIntegerType();
+
+  bool InReg = shouldUseInReg(Ty, State);
+  if (Ty->isPromotableIntegerType()) {
+    if (InReg)
+      return ABIArgInfo::getDirectInReg();
+    return ABIArgInfo::getExtend();
+  }
+  if (InReg)
+    return ABIArgInfo::getDirectInReg();
+  return ABIArgInfo::getDirect();
+}
+
+namespace {
+class LanaiTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+      : TargetCodeGenInfo(new LanaiABIInfo(CGT)) {}
+};
+}
+
+//===----------------------------------------------------------------------===//
 // AMDGPU ABI Implementation
 //===----------------------------------------------------------------------===//
 
@@ -6476,6 +6839,7 @@ public:
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
+  unsigned getOpenCLKernelCallingConv() const override;
 };
 
 }
@@ -6504,6 +6868,53 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
 }
 
 
+unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
+  return llvm::CallingConv::AMDGPU_KERNEL;
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC v8 ABI Implementation.
+// Based on the SPARC Compliance Definition version 2.4.1.
+//
+// Ensures that complex values are passed in registers.
+//
+namespace {
+class SparcV8ABIInfo : public DefaultABIInfo {
+public:
+  SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+
+private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
+  void computeInfo(CGFunctionInfo &FI) const override;
+};
+} // end anonymous namespace
+
+
+ABIArgInfo
+SparcV8ABIInfo::classifyReturnType(QualType Ty) const {
+  if (Ty->isAnyComplexType()) {
+    return ABIArgInfo::getDirect();
+  }
+  else {
+    return DefaultABIInfo::classifyReturnType(Ty);
+  }
+}
+
+void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+
+  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+  for (auto &Arg : FI.arguments())
+    Arg.info = classifyArgumentType(Arg.type);
+}
+
+namespace {
+class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  SparcV8TargetCodeGenInfo(CodeGenTypes &CGT)
+    : TargetCodeGenInfo(new SparcV8ABIInfo(CGT)) {}
+};
+} // end anonymous namespace
+
 //===----------------------------------------------------------------------===//
 // SPARC v9 ABI Implementation.
 // Based on the SPARC Compliance Definition version 2.4.1.
@@ -6569,7 +6980,7 @@ private:
         return;
 
       // Finish the current 64-bit word.
-      uint64_t Aligned = llvm::RoundUpToAlignment(Size, 64);
+      uint64_t Aligned = llvm::alignTo(Size, 64);
       if (Aligned > Size && Aligned <= ToSize) {
         Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size));
         Size = Aligned;
@@ -6686,7 +7097,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
 
   CoerceBuilder CB(getVMContext(), getDataLayout());
   CB.addStruct(0, StrTy);
-  CB.pad(llvm::RoundUpToAlignment(CB.DL.getTypeSizeInBits(StrTy), 64));
+  CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64));
 
   // Try to use the original type for coercion.
   llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType();
@@ -6716,6 +7127,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
   CharUnits Stride;
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
+  case ABIArgInfo::CoerceAndExpand:
   case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
 
@@ -6728,7 +7140,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
   case ABIArgInfo::Direct: {
     auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
-    Stride = CharUnits::fromQuantity(AllocSize).RoundUpToAlignment(SlotSize);
+    Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize);
     ArgAddr = Addr;
     break;
   }
@@ -6924,6 +7336,8 @@ public:
 
 } // End anonymous namespace.
 
+// TODO: this implementation is likely now redundant with the default
+// EmitVAArg.
 Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
   CGBuilderTy &Builder = CGF.Builder;
@@ -6944,6 +7358,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
   CharUnits ArgSize = CharUnits::Zero();
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
+  case ABIArgInfo::CoerceAndExpand:
   case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
   case ABIArgInfo::Ignore:
@@ -6955,7 +7370,7 @@ Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
     Val = Builder.CreateBitCast(AP, ArgPtrTy);
     ArgSize = CharUnits::fromQuantity(
                        getDataLayout().getTypeAllocSize(AI.getCoerceToType()));
-    ArgSize = ArgSize.RoundUpToAlignment(SlotSize);
+    ArgSize = ArgSize.alignTo(SlotSize);
     break;
   case ABIArgInfo::Indirect:
     Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
@@ -7086,15 +7501,59 @@ void XCoreTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
   SmallStringEnc Enc;
   if (getTypeString(Enc, D, CGM, TSC)) {
     llvm::LLVMContext &Ctx = CGM.getModule().getContext();
-    llvm::SmallVector<llvm::Metadata *, 2> MDVals;
-    MDVals.push_back(llvm::ConstantAsMetadata::get(GV));
-    MDVals.push_back(llvm::MDString::get(Ctx, Enc.str()));
+    llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV),
+                                llvm::MDString::get(Ctx, Enc.str())};
     llvm::NamedMDNode *MD =
       CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
     MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
   }
 }
 
+//===----------------------------------------------------------------------===//
+// SPIR ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+class SPIRTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+    : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
+  void emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                    CodeGen::CodeGenModule &M) const override;
+  unsigned getOpenCLKernelCallingConv() const override;
+};
+} // End anonymous namespace.
+
+/// Emit SPIR specific metadata: OpenCL and SPIR version.
+void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
+                                         CodeGen::CodeGenModule &CGM) const {
+  llvm::LLVMContext &Ctx = CGM.getModule().getContext();
+  llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx);
+  llvm::Module &M = CGM.getModule();
+  // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the
+  // opencl.spir.version named metadata.
+  llvm::Metadata *SPIRVerElts[] = {
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 2)),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int32Ty, 0))};
+  llvm::NamedMDNode *SPIRVerMD =
+      M.getOrInsertNamedMetadata("opencl.spir.version");
+  SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts));
+  // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the
+  // opencl.ocl.version named metadata node.
+  llvm::Metadata *OCLVerElts[] = {
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          Int32Ty, CGM.getLangOpts().OpenCLVersion / 100)),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+          Int32Ty, (CGM.getLangOpts().OpenCLVersion % 100) / 10))};
+  llvm::NamedMDNode *OCLVerMD =
+      M.getOrInsertNamedMetadata("opencl.ocl.version");
+  OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts));
+}
+
+unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
+  return llvm::CallingConv::SPIR_KERNEL;
+}
+
 static bool appendType(SmallStringEnc &Enc, QualType QType,
                        const CodeGen::CodeGenModule &CGM,
                        TypeStringCache &TSC);
@@ -7436,29 +7895,35 @@ const llvm::Triple &CodeGenModule::getTriple() const {
 }
 
 bool CodeGenModule::supportsCOMDAT() const {
-  return !getTriple().isOSBinFormatMachO();
+  return getTriple().supportsCOMDAT();
 }
 
 const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   if (TheTargetCodeGenInfo)
     return *TheTargetCodeGenInfo;
 
+  // Helper to set the unique_ptr while still keeping the return value.
+  auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & {
+    this->TheTargetCodeGenInfo.reset(P);
+    return *P;
+  };
+
   const llvm::Triple &Triple = getTarget().getTriple();
   switch (Triple.getArch()) {
   default:
-    return *(TheTargetCodeGenInfo = new DefaultTargetCodeGenInfo(Types));
+    return SetCGInfo(new DefaultTargetCodeGenInfo(Types));
 
   case llvm::Triple::le32:
-    return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types));
+    return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
   case llvm::Triple::mips:
   case llvm::Triple::mipsel:
     if (Triple.getOS() == llvm::Triple::NaCl)
-      return *(TheTargetCodeGenInfo = new PNaClTargetCodeGenInfo(Types));
-    return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, true));
+      return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
+    return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true));
 
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el:
-    return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
+    return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false));
 
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be: {
@@ -7466,78 +7931,79 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     if (getTarget().getABI() == "darwinpcs")
       Kind = AArch64ABIInfo::DarwinPCS;
 
-    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types, Kind));
+    return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
   }
 
   case llvm::Triple::wasm32:
   case llvm::Triple::wasm64:
-    return *(TheTargetCodeGenInfo = new WebAssemblyTargetCodeGenInfo(Types));
+    return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types));
 
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
-  case llvm::Triple::thumbeb:
-    {
-      if (Triple.getOS() == llvm::Triple::Win32) {
-        TheTargetCodeGenInfo =
-            new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP);
-        return *TheTargetCodeGenInfo;
-      }
-
-      ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
-      StringRef ABIStr = getTarget().getABI();
-      if (ABIStr == "apcs-gnu")
-        Kind = ARMABIInfo::APCS;
-      else if (ABIStr == "aapcs16")
-        Kind = ARMABIInfo::AAPCS16_VFP;
-      else if (CodeGenOpts.FloatABI == "hard" ||
-               (CodeGenOpts.FloatABI != "soft" &&
-                Triple.getEnvironment() == llvm::Triple::GNUEABIHF))
-        Kind = ARMABIInfo::AAPCS_VFP;
-
-      return *(TheTargetCodeGenInfo = new ARMTargetCodeGenInfo(Types, Kind));
+  case llvm::Triple::thumbeb: {
+    if (Triple.getOS() == llvm::Triple::Win32) {
+      return SetCGInfo(
+          new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP));
     }
 
+    ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
+    StringRef ABIStr = getTarget().getABI();
+    if (ABIStr == "apcs-gnu")
+      Kind = ARMABIInfo::APCS;
+    else if (ABIStr == "aapcs16")
+      Kind = ARMABIInfo::AAPCS16_VFP;
+    else if (CodeGenOpts.FloatABI == "hard" ||
+             (CodeGenOpts.FloatABI != "soft" &&
+              (Triple.getEnvironment() == llvm::Triple::GNUEABIHF ||
+               Triple.getEnvironment() == llvm::Triple::MuslEABIHF ||
+               Triple.getEnvironment() == llvm::Triple::EABIHF)))
+      Kind = ARMABIInfo::AAPCS_VFP;
+
+    return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind));
+  }
+
   case llvm::Triple::ppc:
-    return *(TheTargetCodeGenInfo = 
-             new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft"));
+    return SetCGInfo(
+        new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft"));
   case llvm::Triple::ppc64:
     if (Triple.isOSBinFormatELF()) {
       PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
       if (getTarget().getABI() == "elfv2")
         Kind = PPC64_SVR4_ABIInfo::ELFv2;
       bool HasQPX = getTarget().getABI() == "elfv1-qpx";
+      bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
 
-      return *(TheTargetCodeGenInfo =
-               new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
+      return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX,
+                                                        IsSoftFloat));
     } else
-      return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types));
+      return SetCGInfo(new PPC64TargetCodeGenInfo(Types));
   case llvm::Triple::ppc64le: {
     assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
     PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
     if (getTarget().getABI() == "elfv1" || getTarget().getABI() == "elfv1-qpx")
       Kind = PPC64_SVR4_ABIInfo::ELFv1;
     bool HasQPX = getTarget().getABI() == "elfv1-qpx";
+    bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
 
-    return *(TheTargetCodeGenInfo =
-             new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX));
+    return SetCGInfo(new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, HasQPX,
+                                                      IsSoftFloat));
   }
 
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
-    return *(TheTargetCodeGenInfo = new NVPTXTargetCodeGenInfo(Types));
+    return SetCGInfo(new NVPTXTargetCodeGenInfo(Types));
 
   case llvm::Triple::msp430:
-    return *(TheTargetCodeGenInfo = new MSP430TargetCodeGenInfo(Types));
+    return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
 
   case llvm::Triple::systemz: {
     bool HasVector = getTarget().getABI() == "vector";
-    return *(TheTargetCodeGenInfo = new SystemZTargetCodeGenInfo(Types,
-                                                                 HasVector));
+    return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector));
   }
 
   case llvm::Triple::tce:
-    return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types));
+    return SetCGInfo(new TCETargetCodeGenInfo(Types));
 
   case llvm::Triple::x86: {
     bool IsDarwinVectorABI = Triple.isOSDarwin();
@@ -7546,44 +8012,49 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
 
     if (Triple.getOS() == llvm::Triple::Win32) {
-      return *(TheTargetCodeGenInfo = new WinX86_32TargetCodeGenInfo(
-                   Types, IsDarwinVectorABI, RetSmallStructInRegABI,
-                   IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
+      return SetCGInfo(new WinX86_32TargetCodeGenInfo(
+          Types, IsDarwinVectorABI, RetSmallStructInRegABI,
+          IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
     } else {
-      return *(TheTargetCodeGenInfo = new X86_32TargetCodeGenInfo(
-                   Types, IsDarwinVectorABI, RetSmallStructInRegABI,
-                   IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
-                   CodeGenOpts.FloatABI == "soft"));
+      return SetCGInfo(new X86_32TargetCodeGenInfo(
+          Types, IsDarwinVectorABI, RetSmallStructInRegABI,
+          IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
+          CodeGenOpts.FloatABI == "soft"));
     }
   }
 
   case llvm::Triple::x86_64: {
     StringRef ABI = getTarget().getABI();
-    X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 :
-                               ABI == "avx" ? X86AVXABILevel::AVX :
-                               X86AVXABILevel::None);
+    X86AVXABILevel AVXLevel =
+        (ABI == "avx512"
+             ? X86AVXABILevel::AVX512
+             : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None);
 
     switch (Triple.getOS()) {
     case llvm::Triple::Win32:
-      return *(TheTargetCodeGenInfo =
-                   new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
+      return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
     case llvm::Triple::PS4:
-      return *(TheTargetCodeGenInfo =
-                   new PS4TargetCodeGenInfo(Types, AVXLevel));
+      return SetCGInfo(new PS4TargetCodeGenInfo(Types, AVXLevel));
     default:
-      return *(TheTargetCodeGenInfo =
-                   new X86_64TargetCodeGenInfo(Types, AVXLevel));
+      return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel));
     }
   }
   case llvm::Triple::hexagon:
-    return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types));
+    return SetCGInfo(new HexagonTargetCodeGenInfo(Types));
+  case llvm::Triple::lanai:
+    return SetCGInfo(new LanaiTargetCodeGenInfo(Types));
   case llvm::Triple::r600:
-    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
+    return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
   case llvm::Triple::amdgcn:
-    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
+    return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
+  case llvm::Triple::sparc:
+    return SetCGInfo(new SparcV8TargetCodeGenInfo(Types));
   case llvm::Triple::sparcv9:
-    return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types));
+    return SetCGInfo(new SparcV9TargetCodeGenInfo(Types));
   case llvm::Triple::xcore:
-    return *(TheTargetCodeGenInfo = new XCoreTargetCodeGenInfo(Types));
+    return SetCGInfo(new XCoreTargetCodeGenInfo(Types));
+  case llvm::Triple::spir:
+  case llvm::Triple::spir64:
+    return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
   }
 }
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
index 87b4704..e463825 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.h
@@ -29,15 +29,14 @@ class Value;
 }
 
 namespace clang {
-class ABIInfo;
 class Decl;
 
 namespace CodeGen {
+class ABIInfo;
 class CallArgList;
 class CodeGenModule;
 class CodeGenFunction;
 class CGFunctionInfo;
-}
 
 /// TargetCodeGenInfo - This class organizes various target-specific
 /// codegeneration issues, like target-specific attributes, builtins and so
@@ -218,7 +217,12 @@ public:
   virtual void getDetectMismatchOption(llvm::StringRef Name,
                                        llvm::StringRef Value,
                                        llvm::SmallString<32> &Opt) const {}
+
+  /// Get LLVM calling convention for OpenCL kernel.
+  virtual unsigned getOpenCLKernelCallingConv() const;
 };
+
+} // namespace CodeGen
 } // namespace clang
 
 #endif // LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H