1 files changed, 595 insertions, 101 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index ffff0d0..7cc63b7 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -17,9 +17,9 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/Frontend/CodeGenOptions.h"
-#include "llvm/Type.h"
-#include "llvm/DataLayout.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace clang;
 using namespace CodeGen;
@@ -37,7 +37,7 @@ static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder,
 }
 
 static bool isAggregateTypeForABI(QualType T) {
-  return CodeGenFunction::hasAggregateLLVMType(T) ||
+  return !CodeGenFunction::hasScalarEvaluationKind(T) ||
          T->isMemberFunctionPointerType();
 }
 
@@ -95,6 +95,7 @@ unsigned TargetCodeGenInfo::getSizeOfUnwindException() const {
   //   x86-32     FreeBSD, Linux, Darwin
   //   PowerPC    Linux, Darwin
   //   ARM        Darwin (*not* EABI)
+  //   AArch64    Linux
   return 32;
 }
 
@@ -173,7 +174,7 @@ static bool hasNonTrivialDestructorOrCopyConstructor(const RecordType *RT) {
   if (!RD)
     return false;
 
-  return !RD->hasTrivialDestructor() || !RD->hasTrivialCopyConstructor();
+  return !RD->hasTrivialDestructor() || RD->hasNonTrivialCopyConstructor();
 }
 
 /// isRecordWithNonTrivialDestructorOrCopyConstructor - Determine if a type is
@@ -266,9 +267,15 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
 }
 
 static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
+  // Treat complex types as the element type.
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
+    Ty = CTy->getElementType();
+
+  // Check for a type which we know has a simple scalar argument-passing
+  // convention without any padding.  (We're specifically looking for 32
+  // and 64-bit integer and integer-equivalents, float, and double.)
   if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
-      !Ty->isAnyComplexType() && !Ty->isEnumeralType() &&
-      !Ty->isBlockPointerType())
+      !Ty->isEnumeralType() && !Ty->isBlockPointerType())
     return false;
 
   uint64_t Size = Context.getTypeSize(Ty);
@@ -414,6 +421,8 @@ class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
 void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
 
+    // Obtain the initial number of registers available for passing integers
+    // from the function's regparm attribute.
     unsigned FreeRegs = FI.getHasRegParm() ? FI.getRegParm() : 0;
 
     for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
@@ -426,15 +435,18 @@ llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   return 0;
 }
 
+// \brief Classify argument of given type \p Ty. \p FreeRegs is the number of
+// registers available for passing arguments - it can be updated by this
+// method.
 ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty,
                                               unsigned &FreeRegs) const {
   if (isAggregateTypeForABI(Ty)) {
-    // Records with non trivial destructors/constructors should not be passed
-    // by value.
+    // In the PNaCl ABI we always pass records/structures on the stack. The
+    // byval attribute can be used if the record doesn't have non-trivial
+    // constructors/destructors.
     FreeRegs = 0;
     if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty))
       return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
-
     return ABIArgInfo::getIndirect(0);
   }
 
@@ -445,14 +457,17 @@ ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty,
   ABIArgInfo BaseInfo = (Ty->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 
-  // Regparm regs hold 32 bits.
-  unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
-  if (SizeInRegs == 0) return BaseInfo;
-  if (SizeInRegs > FreeRegs) {
+  // Figure out how many of the free registers can be occupied by this type.
+  // regparm registers are 32-bit.
+  unsigned NumRegsRequired = (getContext().getTypeSize(Ty) + 31) / 32;
+  if (NumRegsRequired == 0) return BaseInfo;
+  if (NumRegsRequired > FreeRegs) {
+    // If this type needs more registers than we have available, no more
+    // passing in-registers can happen.
     FreeRegs = 0;
     return BaseInfo;
   }
-  FreeRegs -= SizeInRegs;
+  FreeRegs -= NumRegsRequired;
   return BaseInfo.isDirect() ?
       ABIArgInfo::getDirectInReg(BaseInfo.getCoerceToType()) :
       ABIArgInfo::getExtendInReg(BaseInfo.getCoerceToType());
@@ -462,6 +477,7 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
+  // In the PNaCl ABI we always return records/structures on the stack.
   if (isAggregateTypeForABI(RetTy))
     return ABIArgInfo::getIndirect(0);
 
@@ -473,11 +489,9 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
-/// UseX86_MMXType - Return true if this is an MMX type that should use the
-/// special x86_mmx type.
-bool UseX86_MMXType(llvm::Type *IRType) {
-  // If the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>, use the
-  // special x86_mmx type.
+/// IsX86_MMXType - Return true if this is an MMX type.
+bool IsX86_MMXType(llvm::Type *IRType) {
+  // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
   return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
     cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() &&
     IRType->getScalarSizeInBits() != 64;
@@ -506,7 +520,6 @@ class X86_32ABIInfo : public ABIInfo {
 
   bool IsDarwinVectorABI;
   bool IsSmallStructInRegABI;
-  bool IsMMXDisabled;
   bool IsWin32FloatStructABI;
   unsigned DefaultNumRegisterParameters;
 
@@ -539,18 +552,17 @@ public:
   virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                  CodeGenFunction &CGF) const;
 
-  X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool m, bool w,
+  X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool w,
                 unsigned r)
     : ABIInfo(CGT), IsDarwinVectorABI(d), IsSmallStructInRegABI(p),
-      IsMMXDisabled(m), IsWin32FloatStructABI(w),
-      DefaultNumRegisterParameters(r) {}
+      IsWin32FloatStructABI(w), DefaultNumRegisterParameters(r) {}
 };
 
 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
-      bool d, bool p, bool m, bool w, unsigned r)
-    :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, m, w, r)) {}
+      bool d, bool p, bool w, unsigned r)
+    :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, w, r)) {}
 
   void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const;
@@ -903,15 +915,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
                                                             Size));
     }
 
-    llvm::Type *IRType = CGT.ConvertType(Ty);
-    if (UseX86_MMXType(IRType)) {
-      if (IsMMXDisabled)
-        return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
-                                                            64));
-      ABIArgInfo AAI = ABIArgInfo::getDirect(IRType);
-      AAI.setCoerceToType(llvm::Type::getX86_MMXTy(getVMContext()));
-      return AAI;
-    }
+    if (IsX86_MMXType(CGT.ConvertType(Ty)))
+      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64));
 
     return ABIArgInfo::getDirect();
   }
@@ -1013,8 +1018,10 @@ void X86_32TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
       // Now add the 'alignstack' attribute with a value of 16.
       llvm::AttrBuilder B;
       B.addStackAlignmentAttr(16);
-      Fn->addAttribute(llvm::AttrListPtr::FunctionIndex,
-                       llvm::Attributes::get(CGM.getLLVMContext(), B));
+      Fn->addAttributes(llvm::AttributeSet::FunctionIndex,
+                      llvm::AttributeSet::get(CGM.getLLVMContext(),
+                                              llvm::AttributeSet::FunctionIndex,
+                                              B));
     }
   }
 }
@@ -1381,7 +1388,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
     } else if ((k == BuiltinType::Float || k == BuiltinType::Double) ||
                (k == BuiltinType::LongDouble &&
                 getContext().getTargetInfo().getTriple().getOS() ==
-                llvm::Triple::NativeClient)) {
+                llvm::Triple::NaCl)) {
       Current = SSE;
     } else if (k == BuiltinType::LongDouble) {
       Lo = X87;
@@ -1470,7 +1477,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
     else if (ET == getContext().DoubleTy ||
              (ET == getContext().LongDoubleTy &&
               getContext().getTargetInfo().getTriple().getOS() ==
-              llvm::Triple::NativeClient))
+              llvm::Triple::NaCl))
       Lo = Hi = SSE;
     else if (ET == getContext().LongDoubleTy)
       Current = ComplexX87;
@@ -2777,6 +2784,9 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
 
 ABIArgInfo
 PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
+  if (Ty->isAnyComplexType())
+    return ABIArgInfo::getDirect();
+
   if (isAggregateTypeForABI(Ty)) {
     // Records with non trivial destructors/constructors should not be passed
     // by value.
@@ -2795,6 +2805,9 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
+  if (RetTy->isAnyComplexType())
+    return ABIArgInfo::getDirect();
+
   if (isAggregateTypeForABI(RetTy))
     return ABIArgInfo::getIndirect(0);
 
@@ -2813,14 +2826,52 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
 
-  // Update the va_list pointer.
+  // Update the va_list pointer.  The pointer should be bumped by the
+  // size of the object.  We can trust getTypeSize() except for a complex
+  // type whose base type is smaller than a doubleword.  For these, the
+  // size of the object is 16 bytes; see below for further explanation.
   unsigned SizeInBytes = CGF.getContext().getTypeSize(Ty) / 8;
+  QualType BaseTy;
+  unsigned CplxBaseSize = 0;
+
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
+    BaseTy = CTy->getElementType();
+    CplxBaseSize = CGF.getContext().getTypeSize(BaseTy) / 8;
+    if (CplxBaseSize < 8)
+      SizeInBytes = 16;
+  }
+
   unsigned Offset = llvm::RoundUpToAlignment(SizeInBytes, 8);
   llvm::Value *NextAddr =
     Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int64Ty, Offset),
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
 
+  // If we have a complex type and the base type is smaller than 8 bytes,
+  // the ABI calls for the real and imaginary parts to be right-adjusted
+  // in separate doublewords.  However, Clang expects us to produce a
+  // pointer to a structure with the two parts packed tightly.  So generate
+  // loads of the real and imaginary parts relative to the va_list pointer,
+  // and store them to a temporary structure.
+  if (CplxBaseSize && CplxBaseSize < 8) {
+    llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
+    llvm::Value *ImagAddr = RealAddr;
+    RealAddr = Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
+    ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize));
+    llvm::Type *PBaseTy = llvm::PointerType::getUnqual(CGF.ConvertType(BaseTy));
+    RealAddr = Builder.CreateIntToPtr(RealAddr, PBaseTy);
+    ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy);
+    llvm::Value *Real = Builder.CreateLoad(RealAddr, false, ".vareal");
+    llvm::Value *Imag = Builder.CreateLoad(ImagAddr, false, ".vaimag");
+    llvm::Value *Ptr = CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty),
+                                            "vacplx");
+    llvm::Value *RealPtr = Builder.CreateStructGEP(Ptr, 0, ".real");
+    llvm::Value *ImagPtr = Builder.CreateStructGEP(Ptr, 1, ".imag");
+    Builder.CreateStore(Real, RealPtr, false);
+    Builder.CreateStore(Imag, ImagPtr, false);
+    return Ptr;
+  }
+
   // If the argument is smaller than 8 bytes, it is right-adjusted in
   // its doubleword slot.  Adjust the pointer to pick it up from the
   // correct offset.
@@ -2908,7 +2959,9 @@ private:
   ABIKind Kind;
 
 public:
-  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {}
+  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
+    setRuntimeCC();
+  }
 
   bool isEABI() const {
     StringRef Env =
@@ -2930,6 +2983,10 @@ private:
 
   virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                  CodeGenFunction &CGF) const;
+
+  llvm::CallingConv::ID getLLVMDefaultCC() const;
+  llvm::CallingConv::ID getABIDefaultCC() const;
+  void setRuntimeCC();
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -2999,32 +3056,41 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (FI.getCallingConvention() != llvm::CallingConv::C)
     return;
 
-  // Calling convention as default by an ABI.
-  llvm::CallingConv::ID DefaultCC;
+  llvm::CallingConv::ID cc = getRuntimeCC();
+  if (cc != llvm::CallingConv::C)
+    FI.setEffectiveCallingConvention(cc);    
+}
+
+/// Return the default calling convention that LLVM will use.
+llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
+  // The default calling convention that LLVM will infer.
   if (getContext().getTargetInfo().getTriple().getEnvironmentName()=="gnueabihf")
-    DefaultCC = llvm::CallingConv::ARM_AAPCS_VFP;
+    return llvm::CallingConv::ARM_AAPCS_VFP;
   else if (isEABI())
-    DefaultCC = llvm::CallingConv::ARM_AAPCS;
+    return llvm::CallingConv::ARM_AAPCS;
   else
-    DefaultCC = llvm::CallingConv::ARM_APCS;
+    return llvm::CallingConv::ARM_APCS;
+}
 
-  // If user did not ask for specific calling convention explicitly (e.g. via
-  // pcs attribute), set effective calling convention if it's different than ABI
-  // default.
+/// Return the calling convention that our ABI would like us to use
+/// as the C calling convention.
+llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const {
   switch (getABIKind()) {
-  case APCS:
-    if (DefaultCC != llvm::CallingConv::ARM_APCS)
-      FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_APCS);
-    break;
-  case AAPCS:
-    if (DefaultCC != llvm::CallingConv::ARM_AAPCS)
-      FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_AAPCS);
-    break;
-  case AAPCS_VFP:
-    if (DefaultCC != llvm::CallingConv::ARM_AAPCS_VFP)
-      FI.setEffectiveCallingConvention(llvm::CallingConv::ARM_AAPCS_VFP);
-    break;
+  case APCS: return llvm::CallingConv::ARM_APCS;
+  case AAPCS: return llvm::CallingConv::ARM_AAPCS;
+  case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
   }
+  llvm_unreachable("bad ABI kind");
+}
+
+void ARMABIInfo::setRuntimeCC() {
+  assert(getRuntimeCC() == llvm::CallingConv::C);
+
+  // Don't muddy up the IR with a ton of explicit annotations if
+  // they'd just match what LLVM will infer from the triple.
+  llvm::CallingConv::ID abiCC = getABIDefaultCC();
+  if (abiCC != getLLVMDefaultCC())
+    RuntimeCC = abiCC;
 }
 
 /// isHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
@@ -3539,6 +3605,420 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
 }
 
 //===----------------------------------------------------------------------===//
+// AArch64 ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AArch64ABIInfo : public ABIInfo {
+public:
+  AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
+
+private:
+  // The AArch64 PCS is explicit about return types and argument types being
+  // handled identically, so we don't need to draw a distinction between
+  // Argument and Return classification.
+  ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs,
+                                 int &FreeVFPRegs) const;
+
+  ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt,
+                        llvm::Type *DirectTy = 0) const;
+
+  virtual void computeInfo(CGFunctionInfo &FI) const;
+
+  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                 CodeGenFunction &CGF) const;
+};
+
+class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  AArch64TargetCodeGenInfo(CodeGenTypes &CGT)
+    :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {}
+
+  const AArch64ABIInfo &getABIInfo() const {
+    return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
+  }
+
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+    return 31;
+  }
+
+  bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+                               llvm::Value *Address) const {
+    // 0-31 are x0-x30 and sp: 8 bytes each
+    llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
+    AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31);
+
+    // 64-95 are v0-v31: 16 bytes each
+    llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
+    AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95);
+
+    return false;
+  }
+
+};
+
+}
+
+void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  int FreeIntRegs = 8, FreeVFPRegs = 8;
+
+  FI.getReturnInfo() = classifyGenericType(FI.getReturnType(),
+                                           FreeIntRegs, FreeVFPRegs);
+
+  FreeIntRegs = FreeVFPRegs = 8;
+  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+       it != ie; ++it) {
+    it->info = classifyGenericType(it->type, FreeIntRegs, FreeVFPRegs);
+
+  }
+}
+
+ABIArgInfo
+AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded,
+                           bool IsInt, llvm::Type *DirectTy) const {
+  if (FreeRegs >= RegsNeeded) {
+    FreeRegs -= RegsNeeded;
+    return ABIArgInfo::getDirect(DirectTy);
+  }
+
+  llvm::Type *Padding = 0;
+
+  // We need padding so that later arguments don't get filled in anyway. That
+  // wouldn't happen if only ByVal arguments followed in the same category, but
+  // a large structure will simply seem to be a pointer as far as LLVM is
+  // concerned.
+  if (FreeRegs > 0) {
+    if (IsInt)
+      Padding = llvm::Type::getInt64Ty(getVMContext());
+    else
+      Padding = llvm::Type::getFloatTy(getVMContext());
+
+    // Either [N x i64] or [N x float].
+    Padding = llvm::ArrayType::get(Padding, FreeRegs);
+    FreeRegs = 0;
+  }
+
+  return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8,
+                                 /*IsByVal=*/ true, /*Realign=*/ false,
+                                 Padding);
+}
+
+
+ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty,
+                                               int &FreeIntRegs,
+                                               int &FreeVFPRegs) const {
+  // Can only occurs for return, but harmless otherwise.
+  if (Ty->isVoidType())
+    return ABIArgInfo::getIgnore();
+
+  // Large vector types should be returned via memory. There's no such concept
+  // in the ABI, but they'd be over 16 bytes anyway so no matter how they're
+  // classified they'd go into memory (see B.3).
+  if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) {
+    if (FreeIntRegs > 0)
+      --FreeIntRegs;
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+
+  // All non-aggregate LLVM types have a concrete ABI representation so they can
+  // be passed directly. After this block we're guaranteed to be in a
+  // complicated case.
+  if (!isAggregateTypeForABI(Ty)) {
+    // Treat an enum type as its underlying type.
+    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+      Ty = EnumTy->getDecl()->getIntegerType();
+
+    if (Ty->isFloatingType() || Ty->isVectorType())
+      return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false);
+
+    assert(getContext().getTypeSize(Ty) <= 128 &&
+           "unexpectedly large scalar type");
+
+    int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
+
+    // If the type may need padding registers to ensure "alignment", we must be
+    // careful when this is accounted for. Increasing the effective size covers
+    // all cases.
+    if (getContext().getTypeAlign(Ty) == 128)
+      RegsNeeded += FreeIntRegs % 2 != 0;
+
+    return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true);
+  }
+
+  // Structures with either a non-trivial destructor or a non-trivial
+  // copy constructor are always indirect.
+  if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty)) {
+    if (FreeIntRegs > 0)
+      --FreeIntRegs;
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+
+  if (isEmptyRecord(getContext(), Ty, true)) {
+    if (!getContext().getLangOpts().CPlusPlus) {
+      // Empty structs outside C++ mode are a GNU extension, so no ABI can
+      // possibly tell us what to do. It turns out (I believe) that GCC ignores
+      // the object for parameter-passsing purposes.
+      return ABIArgInfo::getIgnore();
+    }
+
+    // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode
+    // description of va_arg in the PCS require that an empty struct does
+    // actually occupy space for parameter-passing. I'm hoping for a
+    // clarification giving an explicit paragraph to point to in future.
+    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true,
+                      llvm::Type::getInt8Ty(getVMContext()));
+  }
+
+  // Homogeneous vector aggregates get passed in registers or on the stack.
+  const Type *Base = 0;
+  uint64_t NumMembers = 0;
+  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) {
+    assert(Base && "Base class should be set for homogeneous aggregate");
+    // Homogeneous aggregates are passed and returned directly.
+    return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers,
+                      /*IsInt=*/ false);
+  }
+
+  uint64_t Size = getContext().getTypeSize(Ty);
+  if (Size <= 128) {
+    // Small structs can use the same direct type whether they're in registers
+    // or on the stack.
+    llvm::Type *BaseTy;
+    unsigned NumBases;
+    int SizeInRegs = (Size + 63) / 64;
+
+    if (getContext().getTypeAlign(Ty) == 128) {
+      BaseTy = llvm::Type::getIntNTy(getVMContext(), 128);
+      NumBases = 1;
+
+      // If the type may need padding registers to ensure "alignment", we must
+      // be careful when this is accounted for. Increasing the effective size
+      // covers all cases.
+      SizeInRegs += FreeIntRegs % 2 != 0;
+    } else {
+      BaseTy = llvm::Type::getInt64Ty(getVMContext());
+      NumBases = SizeInRegs;
+    }
+    llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases);
+
+    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs,
+                      /*IsInt=*/ true, DirectTy);
+  }
+
+  // If the aggregate is > 16 bytes, it's passed and returned indirectly. In
+  // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere.
+  --FreeIntRegs;
+  return ABIArgInfo::getIndirect(0, /* byVal = */ false);
+}
+
+llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                       CodeGenFunction &CGF) const {
+  // The AArch64 va_list type and handling is specified in the Procedure Call
+  // Standard, section B.4:
+  //
+  // struct {
+  //   void *__stack;
+  //   void *__gr_top;
+  //   void *__vr_top;
+  //   int __gr_offs;
+  //   int __vr_offs;
+  // };
+
+  assert(!CGF.CGM.getDataLayout().isBigEndian()
+         && "va_arg not implemented for big-endian AArch64");
+
+  int FreeIntRegs = 8, FreeVFPRegs = 8;
+  Ty = CGF.getContext().getCanonicalType(Ty);
+  ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs);
+
+  llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
+  llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
+  llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
+  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
+
+  llvm::Value *reg_offs_p = 0, *reg_offs = 0;
+  int reg_top_index;
+  int RegSize;
+  if (FreeIntRegs < 8) {
+    assert(FreeVFPRegs == 8 && "Arguments never split between int & VFP regs");
+    // 3 is the field number of __gr_offs
+    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
+    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
+    reg_top_index = 1; // field number for __gr_top
+    RegSize = 8 * (8 - FreeIntRegs);
+  } else {
+    assert(FreeVFPRegs < 8 && "Argument must go in VFP or int regs");
+    // 4 is the field number of __vr_offs.
+    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
+    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
+    reg_top_index = 2; // field number for __vr_top
+    RegSize = 16 * (8 - FreeVFPRegs);
+  }
+
+  //=======================================
+  // Find out where argument was passed
+  //=======================================
+
+  // If reg_offs >= 0 we're already using the stack for this type of
+  // argument. We don't want to keep updating reg_offs (in case it overflows,
+  // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
+  // whatever they get).
+  llvm::Value *UsingStack = 0;
+  UsingStack = CGF.Builder.CreateICmpSGE(reg_offs,
+                                         llvm::ConstantInt::get(CGF.Int32Ty, 0));
+
+  CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
+
+  // Otherwise, at least some kind of argument could go in these registers, the
+  // quesiton is whether this particular type is too big.
+  CGF.EmitBlock(MaybeRegBlock);
+
+  // Integer arguments may need to correct register alignment (for example a
+  // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
+  // align __gr_offs to calculate the potential address.
+  if (FreeIntRegs < 8 && AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
+    int Align = getContext().getTypeAlign(Ty) / 8;
+
+    reg_offs = CGF.Builder.CreateAdd(reg_offs,
+                                 llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
+                                 "align_regoffs");
+    reg_offs = CGF.Builder.CreateAnd(reg_offs,
+                                    llvm::ConstantInt::get(CGF.Int32Ty, -Align),
+                                    "aligned_regoffs");
+  }
+
+  // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
+  llvm::Value *NewOffset = 0;
+  NewOffset = CGF.Builder.CreateAdd(reg_offs,
+                                    llvm::ConstantInt::get(CGF.Int32Ty, RegSize),
+                                    "new_reg_offs");
+  CGF.Builder.CreateStore(NewOffset, reg_offs_p);
+
+  // Now we're in a position to decide whether this argument really was in
+  // registers or not.
+  llvm::Value *InRegs = 0;
+  InRegs = CGF.Builder.CreateICmpSLE(NewOffset,
+                                     llvm::ConstantInt::get(CGF.Int32Ty, 0),
+                                     "inreg");
+
+  CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
+
+  //=======================================
+  // Argument was in registers
+  //=======================================
+
+  // Now we emit the code for if the argument was originally passed in
+  // registers. First start the appropriate block:
+  CGF.EmitBlock(InRegBlock);
+
+  llvm::Value *reg_top_p = 0, *reg_top = 0;
+  reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
+  reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
+  llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
+  llvm::Value *RegAddr = 0;
+  llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+
+  if (!AI.isDirect()) {
+    // If it's been passed indirectly (actually a struct), whatever we find from
+    // stored registers or on the stack will actually be a struct **.
+    MemTy = llvm::PointerType::getUnqual(MemTy);
+  }
+
+  const Type *Base = 0;
+  uint64_t NumMembers;
+  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)
+      && NumMembers > 1) {
+    // Homogeneous aggregates passed in registers will have their elements split
+    // and stored 16-bytes apart regardless of size (they're notionally in qN,
+    // qN+1, ...). We reload and store into a temporary local variable
+    // contiguously.
+    assert(AI.isDirect() && "Homogeneous aggregates should be passed directly");
+    llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
+    llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
+    llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy);
+
+    for (unsigned i = 0; i < NumMembers; ++i) {
+      llvm::Value *BaseOffset = llvm::ConstantInt::get(CGF.Int32Ty, 16 * i);
+      llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
+      LoadAddr = CGF.Builder.CreateBitCast(LoadAddr,
+                                           llvm::PointerType::getUnqual(BaseTy));
+      llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i);
+
+      llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
+      CGF.Builder.CreateStore(Elem, StoreAddr);
+    }
+
+    RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy);
+  } else {
+    // Otherwise the object is contiguous in memory
+    RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy);
+  }
+
+  CGF.EmitBranch(ContBlock);
+
+  //=======================================
+  // Argument was on the stack
+  //=======================================
+  CGF.EmitBlock(OnStackBlock);
+
+  llvm::Value *stack_p = 0, *OnStackAddr = 0;
+  stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
+  OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
+
+  // Again, stack arguments may need realigmnent. In this case both integer and
+  // floating-point ones might be affected.
+  if (AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
+    int Align = getContext().getTypeAlign(Ty) / 8;
+
+    OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
+
+    OnStackAddr = CGF.Builder.CreateAdd(OnStackAddr,
+                                 llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
+                                 "align_stack");
+    OnStackAddr = CGF.Builder.CreateAnd(OnStackAddr,
+                                    llvm::ConstantInt::get(CGF.Int64Ty, -Align),
+                                    "align_stack");
+
+    OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+  }
+
+  uint64_t StackSize;
+  if (AI.isDirect())
+    StackSize = getContext().getTypeSize(Ty) / 8;
+  else
+    StackSize = 8;
+
+  // All stack slots are 8 bytes
+  StackSize = llvm::RoundUpToAlignment(StackSize, 8);
+
+  llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize);
+  llvm::Value *NewStack = CGF.Builder.CreateGEP(OnStackAddr, StackSizeC,
+                                                "new_stack");
+
+  // Write the new value of __stack for the next call to va_arg
+  CGF.Builder.CreateStore(NewStack, stack_p);
+
+  OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy);
+
+  CGF.EmitBranch(ContBlock);
+
+  //=======================================
+  // Tidy up
+  //=======================================
+  CGF.EmitBlock(ContBlock);
+
+  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr");
+  ResAddr->addIncoming(RegAddr, InRegBlock);
+  ResAddr->addIncoming(OnStackAddr, OnStackBlock);
+
+  if (AI.isDirect())
+    return ResAddr;
+
+  return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr");
+}
+
+//===----------------------------------------------------------------------===//
 // NVPTX ABI Implementation
 //===----------------------------------------------------------------------===//
 
@@ -3563,6 +4043,8 @@ public:
     
   virtual void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                                    CodeGen::CodeGenModule &M) const;
+private:
+  static void addKernelMetadata(llvm::Function *F);
 };
 
 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
@@ -3590,25 +4072,7 @@ void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (FI.getCallingConvention() != llvm::CallingConv::C)
     return;
 
-  // Calling convention as default by an ABI.
-  // We're still using the PTX_Kernel/PTX_Device calling conventions here,
-  // but we should switch to NVVM metadata later on.
-  llvm::CallingConv::ID DefaultCC;
-  const LangOptions &LangOpts = getContext().getLangOpts();
-  if (LangOpts.OpenCL || LangOpts.CUDA) {
-    // If we are in OpenCL or CUDA mode, then default to device functions
-    DefaultCC = llvm::CallingConv::PTX_Device;
-  } else {
-    // If we are in standard C/C++ mode, use the triple to decide on the default
-    StringRef Env = 
-      getContext().getTargetInfo().getTriple().getEnvironmentName();
-    if (Env == "device")
-      DefaultCC = llvm::CallingConv::PTX_Device;
-    else
-      DefaultCC = llvm::CallingConv::PTX_Kernel;
-  }
-  FI.setEffectiveCallingConvention(DefaultCC);
-   
+  FI.setEffectiveCallingConvention(getRuntimeCC());
 }
 
 llvm::Value *NVPTXABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -3626,26 +4090,43 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
 
   // Perform special handling in OpenCL mode
   if (M.getLangOpts().OpenCL) {
-    // Use OpenCL function attributes to set proper calling conventions
+    // Use OpenCL function attributes to check for kernel functions
     // By default, all functions are device functions
     if (FD->hasAttr<OpenCLKernelAttr>()) {
-      // OpenCL __kernel functions get a kernel calling convention
-      F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+      // OpenCL __kernel functions get kernel metadata
+      addKernelMetadata(F);
       // And kernel functions are not subject to inlining
-      F->addFnAttr(llvm::Attributes::NoInline);
+      F->addFnAttr(llvm::Attribute::NoInline);
     }
   }
 
   // Perform special handling in CUDA mode.
   if (M.getLangOpts().CUDA) {
-    // CUDA __global__ functions get a kernel calling convention.  Since
+    // CUDA __global__ functions get a kernel metadata entry.  Since
     // __global__ functions cannot be called from the device, we do not
     // need to set the noinline attribute.
     if (FD->getAttr<CUDAGlobalAttr>())
-      F->setCallingConv(llvm::CallingConv::PTX_Kernel);
+      addKernelMetadata(F);
   }
 }
 
+void NVPTXTargetCodeGenInfo::addKernelMetadata(llvm::Function *F) {
+  llvm::Module *M = F->getParent();
+  llvm::LLVMContext &Ctx = M->getContext();
+
+  // Get "nvvm.annotations" metadata node
+  llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
+
+  // Create !{<func-ref>, metadata !"kernel", i32 1} node
+  llvm::SmallVector<llvm::Value *, 3> MDVals;
+  MDVals.push_back(F);
+  MDVals.push_back(llvm::MDString::get(Ctx, "kernel"));
+  MDVals.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1));
+
+  // Append metadata to nvvm.annotations
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -3748,7 +4229,7 @@ void MBlazeTargetCodeGenInfo::SetTargetAttributes(const Decl *D,
       F->setCallingConv(CC);
 
       // Step 2: Add attributes goodness.
-      F->addFnAttr(llvm::Attributes::NoInline);
+      F->addFnAttr(llvm::Attribute::NoInline);
   }
 
   // Step 3: Emit _interrupt_handler alias.
@@ -3786,12 +4267,12 @@ void MSP430TargetCodeGenInfo::SetTargetAttributes(const Decl *D,
       F->setCallingConv(llvm::CallingConv::MSP430_INTR);
 
       // Step 2: Add attributes goodness.
-      F->addFnAttr(llvm::Attributes::NoInline);
+      F->addFnAttr(llvm::Attribute::NoInline);
 
       // Step 3: Emit ISR vector alias.
-      unsigned Num = attr->getNumber() + 0xffe0;
+      unsigned Num = attr->getNumber() / 2;
       new llvm::GlobalAlias(GV->getType(), llvm::Function::ExternalLinkage,
-                            "vector_" + Twine::utohexstr(Num),
+                            "__isr_" + Twine(Num),
                             GV, &M.getModule());
     }
   }
@@ -3834,6 +4315,19 @@ public:
     return 29;
   }
 
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &CGM) const {
+    const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+    if (!FD) return;
+    llvm::Function *Fn = cast<llvm::Function>(GV);
+    if (FD->hasAttr<Mips16Attr>()) {
+      Fn->addFnAttr("mips16");
+    }
+    else if (FD->hasAttr<NoMips16Attr>()) {
+      Fn->addFnAttr("nomips16");
+    }
+  }
+
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
                                llvm::Value *Address) const;
 
@@ -3963,7 +4457,8 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
   if (Ty->isPromotableIntegerType())
     return ABIArgInfo::getExtend();
 
-  return ABIArgInfo::getDirect(0, 0, getPaddingType(Align, OrigOffset));
+  return ABIArgInfo::getDirect(0, 0,
+                               IsO32 ? 0 : getPaddingType(Align, OrigOffset));
 }
 
 llvm::Type*
@@ -4143,7 +4638,7 @@ void TCETargetCodeGenInfo::SetTargetAttributes(const Decl *D,
   if (M.getLangOpts().OpenCL) {
     if (FD->hasAttr<OpenCLKernelAttr>()) {
       // OpenCL C Kernel functions are not subject to inlining
-      F->addFnAttr(llvm::Attributes::NoInline);
+      F->addFnAttr(llvm::Attribute::NoInline);
           
       if (FD->hasAttr<ReqdWorkGroupSizeAttr>()) {
 
@@ -4337,6 +4832,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   case llvm::Triple::mips64el:
     return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
 
+  case llvm::Triple::aarch64:
+    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types));
+
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     {
@@ -4348,7 +4846,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
         Kind = ARMABIInfo::AAPCS_VFP;
 
       switch (Triple.getOS()) {
-        case llvm::Triple::NativeClient:
+        case llvm::Triple::NaCl:
           return *(TheTargetCodeGenInfo =
                    new NaClARMTargetCodeGenInfo(Types, Kind));
         default:
@@ -4379,11 +4877,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     return *(TheTargetCodeGenInfo = new TCETargetCodeGenInfo(Types));
 
   case llvm::Triple::x86: {
-    bool DisableMMX = strcmp(getContext().getTargetInfo().getABI(), "no-mmx") == 0;
-
     if (Triple.isOSDarwin())
       return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types, true, true, DisableMMX, false,
+               new X86_32TargetCodeGenInfo(Types, true, true, false,
                                            CodeGenOpts.NumRegisterParameters));
 
     switch (Triple.getOS()) {
@@ -4395,19 +4891,17 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     case llvm::Triple::OpenBSD:
     case llvm::Triple::Bitrig:
       return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types, false, true, DisableMMX,
-                                           false,
+               new X86_32TargetCodeGenInfo(Types, false, true, false,
                                            CodeGenOpts.NumRegisterParameters));
 
     case llvm::Triple::Win32:
       return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types, false, true, DisableMMX, true,
+               new X86_32TargetCodeGenInfo(Types, false, true, true,
                                            CodeGenOpts.NumRegisterParameters));
 
     default:
       return *(TheTargetCodeGenInfo =
-               new X86_32TargetCodeGenInfo(Types, false, false, DisableMMX,
-                                           false,
+               new X86_32TargetCodeGenInfo(Types, false, false, false,
                                            CodeGenOpts.NumRegisterParameters));
     }
   }
@@ -4420,7 +4914,7 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     case llvm::Triple::MinGW32:
     case llvm::Triple::Cygwin:
       return *(TheTargetCodeGenInfo = new WinX86_64TargetCodeGenInfo(Types));
-    case llvm::Triple::NativeClient:
+    case llvm::Triple::NaCl:
       return *(TheTargetCodeGenInfo = new NaClX86_64TargetCodeGenInfo(Types, HasAVX));
     default:
       return *(TheTargetCodeGenInfo = new X86_64TargetCodeGenInfo(Types,