summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/patches
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/patches')
-rw-r--r--contrib/llvm/patches/README.TXT16
-rw-r--r--contrib/llvm/patches/patch-01-freebsd-kprintf.diff381
-rw-r--r--contrib/llvm/patches/patch-02-clang-vendor-suffix.diff22
-rw-r--r--contrib/llvm/patches/patch-03-add-CC-aliases.diff23
-rw-r--r--contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff31
-rw-r--r--contrib/llvm/patches/patch-05-enable-armv6-clrex.diff20
-rw-r--r--contrib/llvm/patches/patch-06-clang-add-mips-triples.diff33
-rw-r--r--contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff1271
-rw-r--r--contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff419
-rw-r--r--contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff50
10 files changed, 2266 insertions, 0 deletions
diff --git a/contrib/llvm/patches/README.TXT b/contrib/llvm/patches/README.TXT
new file mode 100644
index 0000000..7bc26d2
--- /dev/null
+++ b/contrib/llvm/patches/README.TXT
@@ -0,0 +1,16 @@
+This is a set of individual patches, which contain all the customizations to
+llvm/clang currently in the FreeBSD base system. These can be applied in
+alphabetical order to a pristine llvm/clang 3.6.1 source tree, for example by
+doing:
+
+svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_361/final llvm-3.6.1
+svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_361/final llvm-3.6.1/tools/clang
+cd llvm-3.6.1
+for p in /usr/src/contrib/llvm/patches/patch-*.diff; do
+ patch -p0 -f -F0 -E -i $p -s || break
+done
+
+A number of these consist of hand-written modifications, specifically for
+FreeBSD, while most others are cherry pickings off the llvm and clang trunks.
+When a new version of llvm/clang is eventually imported, those latter ones will
+largely disappear.
diff --git a/contrib/llvm/patches/patch-01-freebsd-kprintf.diff b/contrib/llvm/patches/patch-01-freebsd-kprintf.diff
new file mode 100644
index 0000000..252b4cd
--- /dev/null
+++ b/contrib/llvm/patches/patch-01-freebsd-kprintf.diff
@@ -0,0 +1,381 @@
+This patch adds support for the FreeBSD kernel specific printf format
+specifiers: %b, %D, %r and %y, via a new __freebsd_kprintf__ format
+string type.
+
+Sent upstream as http://reviews.llvm.org/D7154
+
+Index: tools/clang/include/clang/Analysis/Analyses/FormatString.h
+===================================================================
+--- tools/clang/include/clang/Analysis/Analyses/FormatString.h
++++ tools/clang/include/clang/Analysis/Analyses/FormatString.h
+@@ -161,6 +161,12 @@ class ConversionSpecifier {
+ ObjCObjArg, // '@'
+ ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
+
++ // FreeBSD kernel specific specifiers.
++ FreeBSDbArg,
++ FreeBSDDArg,
++ FreeBSDrArg,
++ FreeBSDyArg,
++
+ // GlibC specific specifiers.
+ PrintErrno, // 'm'
+
+@@ -204,7 +210,8 @@ class ConversionSpecifier {
+ return EndScanList ? EndScanList - Position : 1;
+ }
+
+- bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
++ bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
++ kind == FreeBSDrArg || kind == FreeBSDyArg; }
+ bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
+ bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
+ const char *toString() const;
+@@ -646,7 +653,7 @@ class FormatStringHandler {
+
+ bool ParsePrintfString(FormatStringHandler &H,
+ const char *beg, const char *end, const LangOptions &LO,
+- const TargetInfo &Target);
++ const TargetInfo &Target, bool isFreeBSDKPrintf);
+
+ bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO,
+ const TargetInfo &Target);
+Index: tools/clang/include/clang/Sema/Sema.h
+===================================================================
+--- tools/clang/include/clang/Sema/Sema.h
++++ tools/clang/include/clang/Sema/Sema.h
+@@ -8567,6 +8567,7 @@ class Sema {
+ FST_Strftime,
+ FST_Strfmon,
+ FST_Kprintf,
++ FST_FreeBSDKPrintf,
+ FST_Unknown
+ };
+ static FormatStringType GetFormatStringType(const FormatAttr *Format);
+Index: tools/clang/lib/Analysis/FormatString.cpp
+===================================================================
+--- tools/clang/lib/Analysis/FormatString.cpp
++++ tools/clang/lib/Analysis/FormatString.cpp
+@@ -552,6 +552,12 @@ const char *ConversionSpecifier::toString() const
+ // Objective-C specific specifiers.
+ case ObjCObjArg: return "@";
+
++ // FreeBSD kernel specific specifiers.
++ case FreeBSDbArg: return "b";
++ case FreeBSDDArg: return "D";
++ case FreeBSDrArg: return "r";
++ case FreeBSDyArg: return "y";
++
+ // GlibC specific specifiers.
+ case PrintErrno: return "m";
+
+@@ -647,6 +653,9 @@ bool FormatSpecifier::hasValidLengthModifier(const
+ case ConversionSpecifier::XArg:
+ case ConversionSpecifier::nArg:
+ return true;
++ case ConversionSpecifier::FreeBSDrArg:
++ case ConversionSpecifier::FreeBSDyArg:
++ return Target.getTriple().isOSFreeBSD();
+ default:
+ return false;
+ }
+@@ -677,6 +686,9 @@ bool FormatSpecifier::hasValidLengthModifier(const
+ case ConversionSpecifier::ScanListArg:
+ case ConversionSpecifier::ZArg:
+ return true;
++ case ConversionSpecifier::FreeBSDrArg:
++ case ConversionSpecifier::FreeBSDyArg:
++ return Target.getTriple().isOSFreeBSD();
+ default:
+ return false;
+ }
+@@ -807,6 +819,10 @@ bool FormatSpecifier::hasStandardConversionSpecifi
+ case ConversionSpecifier::SArg:
+ return LangOpt.ObjC1 || LangOpt.ObjC2;
+ case ConversionSpecifier::InvalidSpecifier:
++ case ConversionSpecifier::FreeBSDbArg:
++ case ConversionSpecifier::FreeBSDDArg:
++ case ConversionSpecifier::FreeBSDrArg:
++ case ConversionSpecifier::FreeBSDyArg:
+ case ConversionSpecifier::PrintErrno:
+ case ConversionSpecifier::DArg:
+ case ConversionSpecifier::OArg:
+Index: tools/clang/lib/Analysis/PrintfFormatString.cpp
+===================================================================
+--- tools/clang/lib/Analysis/PrintfFormatString.cpp
++++ tools/clang/lib/Analysis/PrintfFormatString.cpp
+@@ -55,7 +55,8 @@ static PrintfSpecifierResult ParsePrintfSpecifier(
+ unsigned &argIndex,
+ const LangOptions &LO,
+ const TargetInfo &Target,
+- bool Warn) {
++ bool Warn,
++ bool isFreeBSDKPrintf) {
+
+ using namespace clang::analyze_format_string;
+ using namespace clang::analyze_printf;
+@@ -206,9 +207,24 @@ static PrintfSpecifierResult ParsePrintfSpecifier(
+ case '@': k = ConversionSpecifier::ObjCObjArg; break;
+ // Glibc specific.
+ case 'm': k = ConversionSpecifier::PrintErrno; break;
++ // FreeBSD kernel specific.
++ case 'b':
++ if (isFreeBSDKPrintf)
++ k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
++ break;
++ case 'r':
++ if (isFreeBSDKPrintf)
++ k = ConversionSpecifier::FreeBSDrArg; // int
++ break;
++ case 'y':
++ if (isFreeBSDKPrintf)
++ k = ConversionSpecifier::FreeBSDyArg; // int
++ break;
+ // Apple-specific.
+ case 'D':
+- if (Target.getTriple().isOSDarwin())
++ if (isFreeBSDKPrintf)
++ k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
++ else if (Target.getTriple().isOSDarwin())
+ k = ConversionSpecifier::DArg;
+ break;
+ case 'O':
+@@ -228,6 +244,10 @@ static PrintfSpecifierResult ParsePrintfSpecifier(
+ FS.setConversionSpecifier(CS);
+ if (CS.consumesDataArgument() && !FS.usesPositionalArg())
+ FS.setArgIndex(argIndex++);
++ // FreeBSD kernel specific.
++ if (k == ConversionSpecifier::FreeBSDbArg ||
++ k == ConversionSpecifier::FreeBSDDArg)
++ argIndex++;
+
+ if (k == ConversionSpecifier::InvalidSpecifier) {
+ // Assume the conversion takes one argument.
+@@ -240,7 +260,8 @@ bool clang::analyze_format_string::ParsePrintfStri
+ const char *I,
+ const char *E,
+ const LangOptions &LO,
+- const TargetInfo &Target) {
++ const TargetInfo &Target,
++ bool isFreeBSDKPrintf) {
+
+ unsigned argIndex = 0;
+
+@@ -247,7 +268,8 @@ bool clang::analyze_format_string::ParsePrintfStri
+ // Keep looking for a format specifier until we have exhausted the string.
+ while (I != E) {
+ const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
+- LO, Target, true);
++ LO, Target, true,
++ isFreeBSDKPrintf);
+ // Did a fail-stop error of any kind occur when parsing the specifier?
+ // If so, don't do any more processing.
+ if (FSR.shouldStop())
+@@ -276,7 +298,8 @@ bool clang::analyze_format_string::ParseFormatStri
+ FormatStringHandler H;
+ while (I != E) {
+ const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
+- LO, Target, false);
++ LO, Target, false,
++ false);
+ // Did a fail-stop error of any kind occur when parsing the specifier?
+ // If so, don't do any more processing.
+ if (FSR.shouldStop())
+@@ -674,6 +697,8 @@ bool PrintfSpecifier::hasValidPlusPrefix() const {
+ case ConversionSpecifier::GArg:
+ case ConversionSpecifier::aArg:
+ case ConversionSpecifier::AArg:
++ case ConversionSpecifier::FreeBSDrArg:
++ case ConversionSpecifier::FreeBSDyArg:
+ return true;
+
+ default:
+@@ -699,6 +724,8 @@ bool PrintfSpecifier::hasValidAlternativeForm() co
+ case ConversionSpecifier::FArg:
+ case ConversionSpecifier::gArg:
+ case ConversionSpecifier::GArg:
++ case ConversionSpecifier::FreeBSDrArg:
++ case ConversionSpecifier::FreeBSDyArg:
+ return true;
+
+ default:
+@@ -729,6 +756,8 @@ bool PrintfSpecifier::hasValidLeadingZeros() const
+ case ConversionSpecifier::FArg:
+ case ConversionSpecifier::gArg:
+ case ConversionSpecifier::GArg:
++ case ConversionSpecifier::FreeBSDrArg:
++ case ConversionSpecifier::FreeBSDyArg:
+ return true;
+
+ default:
+@@ -753,6 +782,8 @@ bool PrintfSpecifier::hasValidSpacePrefix() const
+ case ConversionSpecifier::GArg:
+ case ConversionSpecifier::aArg:
+ case ConversionSpecifier::AArg:
++ case ConversionSpecifier::FreeBSDrArg:
++ case ConversionSpecifier::FreeBSDyArg:
+ return true;
+
+ default:
+@@ -818,6 +849,8 @@ bool PrintfSpecifier::hasValidPrecision() const {
+ case ConversionSpecifier::gArg:
+ case ConversionSpecifier::GArg:
+ case ConversionSpecifier::sArg:
++ case ConversionSpecifier::FreeBSDrArg:
++ case ConversionSpecifier::FreeBSDyArg:
+ return true;
+
+ default:
+Index: tools/clang/lib/Sema/SemaChecking.cpp
+===================================================================
+--- tools/clang/lib/Sema/SemaChecking.cpp
++++ tools/clang/lib/Sema/SemaChecking.cpp
+@@ -2603,6 +2603,7 @@ Sema::FormatStringType Sema::GetFormatStringType(c
+ .Case("strftime", FST_Strftime)
+ .Case("strfmon", FST_Strfmon)
+ .Cases("kprintf", "cmn_err", "vcmn_err", "zcmn_err", FST_Kprintf)
++ .Case("freebsd_kprintf", FST_FreeBSDKPrintf)
+ .Default(FST_Unknown);
+ }
+
+@@ -3384,6 +3385,43 @@ CheckPrintfHandler::HandlePrintfSpecifier(const an
+ CoveredArgs.set(argIndex);
+ }
+
++ // FreeBSD kernel extensions.
++ if (CS.getKind() == ConversionSpecifier::FreeBSDbArg ||
++ CS.getKind() == ConversionSpecifier::FreeBSDDArg) {
++ // We need at least two arguments.
++ if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex + 1))
++ return false;
++
++ // Claim the second argument.
++ CoveredArgs.set(argIndex + 1);
++
++ // Type check the first argument (int for %b, pointer for %D)
++ const Expr *Ex = getDataArg(argIndex);
++ const analyze_printf::ArgType &AT =
++ (CS.getKind() == ConversionSpecifier::FreeBSDbArg) ?
++ ArgType(S.Context.IntTy) : ArgType::CPointerTy;
++ if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType()))
++ EmitFormatDiagnostic(
++ S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
++ << AT.getRepresentativeTypeName(S.Context) << Ex->getType()
++ << false << Ex->getSourceRange(),
++ Ex->getLocStart(), /*IsStringLocation*/false,
++ getSpecifierRange(startSpecifier, specifierLen));
++
++ // Type check the second argument (char * for both %b and %D)
++ Ex = getDataArg(argIndex + 1);
++ const analyze_printf::ArgType &AT2 = ArgType::CStrTy;
++ if (AT2.isValid() && !AT2.matchesType(S.Context, Ex->getType()))
++ EmitFormatDiagnostic(
++ S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
++ << AT2.getRepresentativeTypeName(S.Context) << Ex->getType()
++ << false << Ex->getSourceRange(),
++ Ex->getLocStart(), /*IsStringLocation*/false,
++ getSpecifierRange(startSpecifier, specifierLen));
++
++ return true;
++ }
++
+ // Check for using an Objective-C specific conversion specifier
+ // in a non-ObjC literal.
+ if (!ObjCContext && CS.isObjCArg()) {
+@@ -4007,7 +4045,8 @@ void Sema::CheckFormatString(const StringLiteral *
+ return;
+ }
+
+- if (Type == FST_Printf || Type == FST_NSString) {
++ if (Type == FST_Printf || Type == FST_NSString ||
++ Type == FST_FreeBSDKPrintf) {
+ CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
+ numDataArgs, (Type == FST_NSString),
+ Str, HasVAListArg, Args, format_idx,
+@@ -4015,7 +4054,8 @@ void Sema::CheckFormatString(const StringLiteral *
+
+ if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen,
+ getLangOpts(),
+- Context.getTargetInfo()))
++ Context.getTargetInfo(),
++ Type == FST_FreeBSDKPrintf))
+ H.DoneProcessing();
+ } else if (Type == FST_Scanf) {
+ CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, numDataArgs,
+Index: tools/clang/lib/Sema/SemaDeclAttr.cpp
+===================================================================
+--- tools/clang/lib/Sema/SemaDeclAttr.cpp
++++ tools/clang/lib/Sema/SemaDeclAttr.cpp
+@@ -2481,6 +2481,7 @@ static FormatAttrKind getFormatAttrKind(StringRef
+ .Cases("scanf", "printf", "printf0", "strfmon", SupportedFormat)
+ .Cases("cmn_err", "vcmn_err", "zcmn_err", SupportedFormat)
+ .Case("kprintf", SupportedFormat) // OpenBSD.
++ .Case("freebsd_kprintf", SupportedFormat) // FreeBSD.
+
+ .Cases("gcc_diag", "gcc_cdiag", "gcc_cxxdiag", "gcc_tdiag", IgnoredFormat)
+ .Default(InvalidFormat);
+Index: tools/clang/test/Sema/attr-format.c
+===================================================================
+--- tools/clang/test/Sema/attr-format.c
++++ tools/clang/test/Sema/attr-format.c
+@@ -57,8 +57,15 @@ void callnull(void){
+ null(0, (int*)0); // expected-warning {{incompatible pointer types}}
+ }
+
++// FreeBSD kernel extensions
++void a3(const char *a, ...) __attribute__((format(freebsd_kprintf, 1,2))); // no-error
++void b3(const char *a, ...) __attribute__((format(freebsd_kprintf, 1,1))); // expected-error {{'format' attribute parameter 3 is out of bounds}}
++void c3(const char *a, ...) __attribute__((format(freebsd_kprintf, 0,2))); // expected-error {{'format' attribute parameter 2 is out of bounds}}
++void d3(const char *a, int c) __attribute__((format(freebsd_kprintf, 1,2))); // expected-error {{format attribute requires variadic function}}
++void e3(char *str, int c, ...) __attribute__((format(freebsd_kprintf, 2,3))); // expected-error {{format argument not a string type}}
+
+
++
+ // PR4470
+ int xx_vprintf(const char *, va_list);
+
+Index: tools/clang/test/Sema/format-strings-freebsd.c
+===================================================================
+--- tools/clang/test/Sema/format-strings-freebsd.c
++++ tools/clang/test/Sema/format-strings-freebsd.c
+@@ -0,0 +1,40 @@
++// RUN: %clang_cc1 -fsyntax-only -verify -triple i386-unknown-freebsd %s
++// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-unknown-freebsd %s
++
++// Test FreeBSD kernel printf extensions.
++int freebsd_kernel_printf(const char *, ...) __attribute__((__format__(__freebsd_kprintf__, 1, 2)));
++
++void check_freebsd_kernel_extensions(int i, long l, char *s)
++{
++ // %b expects an int and a char *
++ freebsd_kernel_printf("reg=%b\n", i, "\10\2BITTWO\1BITONE\n"); // no-warning
++ freebsd_kernel_printf("reg=%b\n", l, "\10\2BITTWO\1BITONE\n"); // expected-warning{{format specifies type 'int' but the argument has type 'long'}}
++ freebsd_kernel_printf("reg=%b\n", i, l); // expected-warning{{format specifies type 'char *' but the argument has type 'long'}}
++ freebsd_kernel_printf("reg=%b\n", i); // expected-warning{{more '%' conversions than data arguments}}
++ freebsd_kernel_printf("reg=%b\n", i, "\10\2BITTWO\1BITONE\n", l); // expected-warning{{data argument not used by format string}}
++
++ // %D expects an unsigned char * and a char *
++ freebsd_kernel_printf("%6D", s, ":"); // no-warning
++ freebsd_kernel_printf("%6D", i, ":"); // expected-warning{{format specifies type 'void *' but the argument has type 'int'}}
++ freebsd_kernel_printf("%6D", s, i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
++ freebsd_kernel_printf("%6D", s); // expected-warning{{more '%' conversions than data arguments}}
++ freebsd_kernel_printf("%6D", s, ":", i); // expected-warning{{data argument not used by format string}}
++
++ freebsd_kernel_printf("%*D", 42, s, ":"); // no-warning
++ freebsd_kernel_printf("%*D", 42, i, ":"); // expected-warning{{format specifies type 'void *' but the argument has type 'int'}}
++ freebsd_kernel_printf("%*D", 42, s, i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
++ freebsd_kernel_printf("%*D", 42, s); // expected-warning{{more '%' conversions than data arguments}}
++ freebsd_kernel_printf("%*D", 42, s, ":", i); // expected-warning{{data argument not used by format string}}
++
++ // %r expects an int
++ freebsd_kernel_printf("%r", i); // no-warning
++ freebsd_kernel_printf("%r", l); // expected-warning{{format specifies type 'int' but the argument has type 'long'}}
++ freebsd_kernel_printf("%lr", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}}
++ freebsd_kernel_printf("%lr", l); // no-warning
++
++ // %y expects an int
++ freebsd_kernel_printf("%y", i); // no-warning
++ freebsd_kernel_printf("%y", l); // expected-warning{{format specifies type 'int' but the argument has type 'long'}}
++ freebsd_kernel_printf("%ly", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}}
++ freebsd_kernel_printf("%ly", l); // no-warning
++}
diff --git a/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff b/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff
new file mode 100644
index 0000000..f94b9f3
--- /dev/null
+++ b/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff
@@ -0,0 +1,22 @@
+This patch adds a FreeBSD-specific suffix to clang's version string. This is
+usually of the form "(yyyyddmm)", representing the date when the compiler was
+last updated.
+
+Introduced here: http://svnweb.freebsd.org/changeset/base/209107
+
+Index: tools/clang/lib/Basic/Version.cpp
+===================================================================
+--- tools/clang/lib/Basic/Version.cpp
++++ tools/clang/lib/Basic/Version.cpp
+@@ -128,8 +128,10 @@ std::string getClangToolFullVersion(StringRef Tool
+ OS << ToolName << " version " CLANG_VERSION_STRING " "
+ << getClangFullRepositoryVersion();
+
++#ifdef CLANG_VENDOR_SUFFIX
++ OS << CLANG_VENDOR_SUFFIX;
++#elif defined(CLANG_VENDOR)
+ // If vendor supplied, include the base LLVM version as well.
+-#ifdef CLANG_VENDOR
+ OS << " (based on " << BACKEND_PACKAGE_STRING << ")";
+ #endif
+
diff --git a/contrib/llvm/patches/patch-03-add-CC-aliases.diff b/contrib/llvm/patches/patch-03-add-CC-aliases.diff
new file mode 100644
index 0000000..884b14c
--- /dev/null
+++ b/contrib/llvm/patches/patch-03-add-CC-aliases.diff
@@ -0,0 +1,23 @@
+This patch adds "CC" and "clang-CC" to the list of program name aliases which
+invoke the C++ compiler.
+
+Introduced here: http://svnweb.freebsd.org/changeset/base/257109
+
+Index: tools/clang/tools/driver/driver.cpp
+===================================================================
+--- tools/clang/tools/driver/driver.cpp
++++ tools/clang/tools/driver/driver.cpp
+@@ -213,11 +213,13 @@ static const DriverSuffix *FindDriverSuffix(String
+ {"clang", nullptr},
+ {"clang++", "--driver-mode=g++"},
+ {"clang-c++", "--driver-mode=g++"},
++ {"clang-CC", "--driver-mode=g++"},
+ {"clang-cc", nullptr},
+ {"clang-cpp", "--driver-mode=cpp"},
+ {"clang-g++", "--driver-mode=g++"},
+ {"clang-gcc", nullptr},
+ {"clang-cl", "--driver-mode=cl"},
++ {"CC", "--driver-mode=g++"},
+ {"cc", nullptr},
+ {"cpp", "--driver-mode=cpp"},
+ {"cl", "--driver-mode=cl"},
diff --git a/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff b/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff
new file mode 100644
index 0000000..5f31623
--- /dev/null
+++ b/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff
@@ -0,0 +1,31 @@
+Add an llvm option to enable/disable running the global value numbering
+optimization pass. Disabling this pass helps to minimize the size of
+boot2.
+
+Introduced here: http://svnweb.freebsd.org/changeset/base/274968
+
+Index: lib/Transforms/IPO/PassManagerBuilder.cpp
+===================================================================
+--- lib/Transforms/IPO/PassManagerBuilder.cpp
++++ lib/Transforms/IPO/PassManagerBuilder.cpp
+@@ -78,6 +78,10 @@ static cl::opt<bool>
+ EnableMLSM("mlsm", cl::init(true), cl::Hidden,
+ cl::desc("Enable motion of merged load and store"));
+
++static cl::opt<bool> EnableGVN("enable-gvn",
++ cl::init(true), cl::Hidden,
++ cl::desc("Run the global value numbering pass"));
++
+ PassManagerBuilder::PassManagerBuilder() {
+ OptLevel = 2;
+ SizeLevel = 0;
+@@ -244,7 +248,8 @@ void PassManagerBuilder::populateModulePassManager
+ if (OptLevel > 1) {
+ if (EnableMLSM)
+ MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
+- MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
++ if (EnableGVN)
++ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+ }
+ MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
+ MPM.add(createSCCPPass()); // Constant prop with SCCP
diff --git a/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff b/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff
new file mode 100644
index 0000000..574e3bd
--- /dev/null
+++ b/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff
@@ -0,0 +1,20 @@
+For now, enable the clrex instruction for armv6, until upstream
+implements this properly.
+
+Submitted by: rdivacky
+
+Introduced here: http://svnweb.freebsd.org/changeset/base/275362
+
+Index: lib/Target/ARM/ARMInstrInfo.td
+===================================================================
+--- lib/Target/ARM/ARMInstrInfo.td
++++ lib/Target/ARM/ARMInstrInfo.td
+@@ -4640,7 +4640,7 @@ def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
+
+ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+ [(int_arm_clrex)]>,
+- Requires<[IsARM, HasV7]> {
++ Requires<[IsARM, HasV6]> {
+ let Inst{31-0} = 0b11110101011111111111000000011111;
+ }
+
diff --git a/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff b/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff
new file mode 100644
index 0000000..2a66949
--- /dev/null
+++ b/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff
@@ -0,0 +1,33 @@
+Allow clang to be built for mips/mips64 backend types by adding our mips
+triple ids
+
+This only allows testing and does not change the defaults for mips/mips64.
+They still build/use gcc by default.
+
+Differential Revision: https://reviews.freebsd.org/D1190
+Reviewed by: dim
+
+Introduced here: http://svnweb.freebsd.org/changeset/base/277423
+
+Index: tools/clang/lib/Driver/Tools.cpp
+===================================================================
+--- tools/clang/lib/Driver/Tools.cpp
++++ tools/clang/lib/Driver/Tools.cpp
+@@ -6652,6 +6652,17 @@ void freebsd::Link::ConstructJob(Compilation &C, c
+ CmdArgs.push_back("elf32ppc_fbsd");
+ }
+
++ if (Arg *A = Args.getLastArg(options::OPT_G)) {
++ if (ToolChain.getArch() == llvm::Triple::mips ||
++ ToolChain.getArch() == llvm::Triple::mipsel ||
++ ToolChain.getArch() == llvm::Triple::mips64 ||
++ ToolChain.getArch() == llvm::Triple::mips64el) {
++ StringRef v = A->getValue();
++ CmdArgs.push_back(Args.MakeArgString("-G" + v));
++ A->claim();
++ }
++ }
++
+ if (Output.isFilename()) {
+ CmdArgs.push_back("-o");
+ CmdArgs.push_back(Output.getFilename());
diff --git a/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff b/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff
new file mode 100644
index 0000000..57e16d7
--- /dev/null
+++ b/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff
@@ -0,0 +1,1271 @@
+Pull in r227752 from upstream llvm trunk (by Michael Kuperstein):
+
+ [X86] Convert esp-relative movs of function arguments to pushes, step 2
+
+ This moves the transformation introduced in r223757 into a separate MI pass.
+ This allows it to cover many more cases (not only cases where there must be a
+ reserved call frame), and perform rudimentary call folding. It still doesn't
+ have a heuristic, so it is enabled only for optsize/minsize, with stack
+ alignment <= 8, where it ought to be a fairly clear win.
+
+ (Re-commit of r227728)
+
+ Differential Revision: http://reviews.llvm.org/D6789
+
+This helps to get sys/boot/i386/boot2 below the required size again,
+when optimizing with -Oz.
+
+Introduced here: http://svnweb.freebsd.org/changeset/base/278112
+
+Index: include/llvm/Target/TargetFrameLowering.h
+===================================================================
+--- include/llvm/Target/TargetFrameLowering.h
++++ include/llvm/Target/TargetFrameLowering.h
+@@ -193,6 +193,11 @@ class TargetFrameLowering {
+ return hasReservedCallFrame(MF) || hasFP(MF);
+ }
+
++ // needsFrameIndexResolution - Do we need to perform FI resolution for
++ // this function. Normally, this is required only when the function
++ // has any stack objects. However, targets may want to override this.
++ virtual bool needsFrameIndexResolution(const MachineFunction &MF) const;
++
+ /// getFrameIndexOffset - Returns the displacement from the frame register to
+ /// the stack frame of the specified index.
+ virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+Index: lib/CodeGen/PrologEpilogInserter.cpp
+===================================================================
+--- lib/CodeGen/PrologEpilogInserter.cpp
++++ lib/CodeGen/PrologEpilogInserter.cpp
+@@ -703,7 +703,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &
+ /// register references and actual offsets.
+ ///
+ void PEI::replaceFrameIndices(MachineFunction &Fn) {
+- if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
++ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
++ if (!TFI.needsFrameIndexResolution(Fn)) return;
+
+ // Store SPAdj at exit of a basic block.
+ SmallVector<int, 8> SPState;
+@@ -769,13 +770,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B
+ continue;
+ }
+
+- // If we are looking at a call sequence, we need to keep track of
+- // the SP adjustment made by each instruction in the sequence.
+- // This includes both the frame setup/destroy pseudos (handled above),
+- // as well as other instructions that have side effects w.r.t the SP.
+- if (InsideCallSequence)
+- SPAdj += TII.getSPAdjust(I);
+-
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+@@ -854,6 +848,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B
+ break;
+ }
+
++ // If we are looking at a call sequence, we need to keep track of
++ // the SP adjustment made by each instruction in the sequence.
++ // This includes both the frame setup/destroy pseudos (handled above),
++ // as well as other instructions that have side effects w.r.t the SP.
++ // Note that this must come after eliminateFrameIndex, because
++ // if I itself referred to a frame index, we shouldn't count its own
++ // adjustment.
++ if (MI && InsideCallSequence)
++ SPAdj += TII.getSPAdjust(MI);
++
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+Index: lib/CodeGen/TargetFrameLoweringImpl.cpp
+===================================================================
+--- lib/CodeGen/TargetFrameLoweringImpl.cpp
++++ lib/CodeGen/TargetFrameLoweringImpl.cpp
+@@ -42,3 +42,8 @@ int TargetFrameLowering::getFrameIndexReference(co
+ FrameReg = RI->getFrameRegister(MF);
+ return getFrameIndexOffset(MF, FI);
+ }
++
++bool TargetFrameLowering::needsFrameIndexResolution(
++ const MachineFunction &MF) const {
++ return MF.getFrameInfo()->hasStackObjects();
++}
+Index: lib/Target/X86/CMakeLists.txt
+===================================================================
+--- lib/Target/X86/CMakeLists.txt
++++ lib/Target/X86/CMakeLists.txt
+@@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen)
+
+ set(sources
+ X86AsmPrinter.cpp
++ X86CallFrameOptimization.cpp
+ X86FastISel.cpp
+ X86FloatingPoint.cpp
+ X86FrameLowering.cpp
+Index: lib/Target/X86/X86.h
+===================================================================
+--- lib/Target/X86/X86.h
++++ lib/Target/X86/X86.h
+@@ -67,6 +67,11 @@ FunctionPass *createX86PadShortFunctions();
+ /// to eliminate execution delays in some Atom processors.
+ FunctionPass *createX86FixupLEAs();
+
++/// createX86CallFrameOptimization - Return a pass that optimizes
++/// the code-size of x86 call sequences. This is done by replacing
++/// esp-relative movs with pushes.
++FunctionPass *createX86CallFrameOptimization();
++
+ } // End llvm namespace
+
+ #endif
+Index: lib/Target/X86/X86CallFrameOptimization.cpp
+===================================================================
+--- lib/Target/X86/X86CallFrameOptimization.cpp
++++ lib/Target/X86/X86CallFrameOptimization.cpp
+@@ -0,0 +1,400 @@
++//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines a pass that optimizes call sequences on x86.
++// Currently, it converts movs of function parameters onto the stack into
++// pushes. This is beneficial for two main reasons:
++// 1) The push instruction encoding is much smaller than an esp-relative mov
++// 2) It is possible to push memory arguments directly. So, if the
++// the transformation is preformed pre-reg-alloc, it can help relieve
++// register pressure.
++//
++//===----------------------------------------------------------------------===//
++
++#include <algorithm>
++
++#include "X86.h"
++#include "X86InstrInfo.h"
++#include "X86Subtarget.h"
++#include "X86MachineFunctionInfo.h"
++#include "llvm/ADT/Statistic.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/Passes.h"
++#include "llvm/IR/Function.h"
++#include "llvm/Support/Debug.h"
++#include "llvm/Support/raw_ostream.h"
++#include "llvm/Target/TargetInstrInfo.h"
++
++using namespace llvm;
++
++#define DEBUG_TYPE "x86-cf-opt"
++
++cl::opt<bool> NoX86CFOpt("no-x86-call-frame-opt",
++ cl::desc("Avoid optimizing x86 call frames for size"),
++ cl::init(false), cl::Hidden);
++
++namespace {
++class X86CallFrameOptimization : public MachineFunctionPass {
++public:
++ X86CallFrameOptimization() : MachineFunctionPass(ID) {}
++
++ bool runOnMachineFunction(MachineFunction &MF) override;
++
++private:
++ bool shouldPerformTransformation(MachineFunction &MF);
++
++ bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator I);
++
++ MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
++ unsigned Reg);
++
++ const char *getPassName() const override {
++ return "X86 Optimize Call Frame";
++ }
++
++ const TargetInstrInfo *TII;
++ const TargetFrameLowering *TFL;
++ const MachineRegisterInfo *MRI;
++ static char ID;
++};
++
++char X86CallFrameOptimization::ID = 0;
++}
++
++FunctionPass *llvm::createX86CallFrameOptimization() {
++ return new X86CallFrameOptimization();
++}
++
++// This checks whether the transformation is legal and profitable
++bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF) {
++ if (NoX86CFOpt.getValue())
++ return false;
++
++ // We currently only support call sequences where *all* parameters.
++ // are passed on the stack.
++ // No point in running this in 64-bit mode, since some arguments are
++ // passed in-register in all common calling conventions, so the pattern
++ // we're looking for will never match.
++ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
++ if (STI.is64Bit())
++ return false;
++
++ // You would expect straight-line code between call-frame setup and
++ // call-frame destroy. You would be wrong. There are circumstances (e.g.
++ // CMOV_GR8 expansion of a select that feeds a function call!) where we can
++ // end up with the setup and the destroy in different basic blocks.
++ // This is bad, and breaks SP adjustment.
++ // So, check that all of the frames in the function are closed inside
++ // the same block, and, for good measure, that there are no nested frames.
++ int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
++ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
++ for (MachineBasicBlock &BB : MF) {
++ bool InsideFrameSequence = false;
++ for (MachineInstr &MI : BB) {
++ if (MI.getOpcode() == FrameSetupOpcode) {
++ if (InsideFrameSequence)
++ return false;
++ InsideFrameSequence = true;
++ }
++ else if (MI.getOpcode() == FrameDestroyOpcode) {
++ if (!InsideFrameSequence)
++ return false;
++ InsideFrameSequence = false;
++ }
++ }
++
++ if (InsideFrameSequence)
++ return false;
++ }
++
++ // Now that we know the transformation is legal, check if it is
++ // profitable.
++ // TODO: Add a heuristic that actually looks at the function,
++ // and enable this for more cases.
++
++ // This transformation is always a win when we expected to have
++ // a reserved call frame. Under other circumstances, it may be either
++ // a win or a loss, and requires a heuristic.
++ // For now, enable it only for the relatively clear win cases.
++ bool CannotReserveFrame = MF.getFrameInfo()->hasVarSizedObjects();
++ if (CannotReserveFrame)
++ return true;
++
++ // For now, don't even try to evaluate the profitability when
++ // not optimizing for size.
++ AttributeSet FnAttrs = MF.getFunction()->getAttributes();
++ bool OptForSize =
++ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
++ Attribute::OptimizeForSize) ||
++ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
++
++ if (!OptForSize)
++ return false;
++
++ // Stack re-alignment can make this unprofitable even in terms of size.
++ // As mentioned above, a better heuristic is needed. For now, don't do this
++ // when the required alignment is above 8. (4 would be the safe choice, but
++ // some experimentation showed 8 is generally good).
++ if (TFL->getStackAlignment() > 8)
++ return false;
++
++ return true;
++}
++
++bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
++ TII = MF.getSubtarget().getInstrInfo();
++ TFL = MF.getSubtarget().getFrameLowering();
++ MRI = &MF.getRegInfo();
++
++ if (!shouldPerformTransformation(MF))
++ return false;
++
++ int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
++
++ bool Changed = false;
++
++ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
++ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
++ if (I->getOpcode() == FrameSetupOpcode)
++ Changed |= adjustCallSequence(MF, *BB, I);
++
++ return Changed;
++}
++
++bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
++ MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator I) {
++
++ // Check that this particular call sequence is amenable to the
++ // transformation.
++ const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
++ MF.getSubtarget().getRegisterInfo());
++ unsigned StackPtr = RegInfo.getStackRegister();
++ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
++
++ // We expect to enter this at the beginning of a call sequence
++ assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
++ MachineBasicBlock::iterator FrameSetup = I++;
++
++
++ // For globals in PIC mode, we can have some LEAs here.
++ // Ignore them, they don't bother us.
++ // TODO: Extend this to something that covers more cases.
++ while (I->getOpcode() == X86::LEA32r)
++ ++I;
++
++ // We expect a copy instruction here.
++ // TODO: The copy instruction is a lowering artifact.
++ // We should also support a copy-less version, where the stack
++ // pointer is used directly.
++ if (!I->isCopy() || !I->getOperand(0).isReg())
++ return false;
++ MachineBasicBlock::iterator SPCopy = I++;
++ StackPtr = SPCopy->getOperand(0).getReg();
++
++ // Scan the call setup sequence for the pattern we're looking for.
++ // We only handle a simple case - a sequence of MOV32mi or MOV32mr
++ // instructions, that push a sequence of 32-bit values onto the stack, with
++ // no gaps between them.
++ SmallVector<MachineInstr*, 4> MovVector(4, nullptr);
++ unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
++ if (MaxAdjust > 4)
++ MovVector.resize(MaxAdjust, nullptr);
++
++ do {
++ int Opcode = I->getOpcode();
++ if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
++ break;
++
++ // We only want movs of the form:
++ // movl imm/r32, k(%esp)
++ // If we run into something else, bail.
++ // Note that AddrBaseReg may, counter to its name, not be a register,
++ // but rather a frame index.
++ // TODO: Support the fi case. This should probably work now that we
++ // have the infrastructure to track the stack pointer within a call
++ // sequence.
++ if (!I->getOperand(X86::AddrBaseReg).isReg() ||
++ (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
++ !I->getOperand(X86::AddrScaleAmt).isImm() ||
++ (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
++ (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
++ (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
++ !I->getOperand(X86::AddrDisp).isImm())
++ return false;
++
++ int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
++ assert(StackDisp >= 0 && "Negative stack displacement when passing parameters");
++
++ // We really don't want to consider the unaligned case.
++ if (StackDisp % 4)
++ return false;
++ StackDisp /= 4;
++
++ assert((size_t)StackDisp < MovVector.size() &&
++ "Function call has more parameters than the stack is adjusted for.");
++
++ // If the same stack slot is being filled twice, something's fishy.
++ if (MovVector[StackDisp] != nullptr)
++ return false;
++ MovVector[StackDisp] = I;
++
++ ++I;
++ } while (I != MBB.end());
++
++ // We now expect the end of the sequence - a call and a stack adjust.
++ if (I == MBB.end())
++ return false;
++
++ // For PCrel calls, we expect an additional COPY of the basereg.
++ // If we find one, skip it.
++ if (I->isCopy()) {
++ if (I->getOperand(1).getReg() ==
++ MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
++ ++I;
++ else
++ return false;
++ }
++
++ if (!I->isCall())
++ return false;
++ MachineBasicBlock::iterator Call = I;
++ if ((++I)->getOpcode() != FrameDestroyOpcode)
++ return false;
++
++ // Now, go through the vector, and see that we don't have any gaps,
++ // but only a series of 32-bit MOVs.
++
++ int64_t ExpectedDist = 0;
++ auto MMI = MovVector.begin(), MME = MovVector.end();
++ for (; MMI != MME; ++MMI, ExpectedDist += 4)
++ if (*MMI == nullptr)
++ break;
++
++ // If the call had no parameters, do nothing
++ if (!ExpectedDist)
++ return false;
++
++ // We are either at the last parameter, or a gap.
++ // Make sure it's not a gap
++ for (; MMI != MME; ++MMI)
++ if (*MMI != nullptr)
++ return false;
++
++ // Ok, we can in fact do the transformation for this call.
++ // Do not remove the FrameSetup instruction, but adjust the parameters.
++ // PEI will end up finalizing the handling of this.
++ FrameSetup->getOperand(1).setImm(ExpectedDist);
++
++ DebugLoc DL = I->getDebugLoc();
++ // Now, iterate through the vector in reverse order, and replace the movs
++ // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
++ // replace uses.
++ for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
++ MachineBasicBlock::iterator MOV = *MovVector[Idx];
++ MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
++ if (MOV->getOpcode() == X86::MOV32mi) {
++ unsigned PushOpcode = X86::PUSHi32;
++ // If the operand is a small (8-bit) immediate, we can use a
++ // PUSH instruction with a shorter encoding.
++ // Note that isImm() may fail even though this is a MOVmi, because
++ // the operand can also be a symbol.
++ if (PushOp.isImm()) {
++ int64_t Val = PushOp.getImm();
++ if (isInt<8>(Val))
++ PushOpcode = X86::PUSH32i8;
++ }
++ BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
++ } else {
++ unsigned int Reg = PushOp.getReg();
++
++ // If PUSHrmm is not slow on this target, try to fold the source of the
++ // push into the instruction.
++ const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
++ bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
++
++ // Check that this is legal to fold. Right now, we're extremely
++ // conservative about that.
++ MachineInstr *DefMov = nullptr;
++ if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
++ MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm));
++
++ unsigned NumOps = DefMov->getDesc().getNumOperands();
++ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
++ Push->addOperand(DefMov->getOperand(i));
++
++ DefMov->eraseFromParent();
++ } else {
++ BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr();
++ }
++ }
++
++ MBB.erase(MOV);
++ }
++
++ // The stack-pointer copy is no longer used in the call sequences.
++ // There should not be any other users, but we can't commit to that, so:
++ if (MRI->use_empty(SPCopy->getOperand(0).getReg()))
++ SPCopy->eraseFromParent();
++
++ // Once we've done this, we need to make sure PEI doesn't assume a reserved
++ // frame.
++ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
++ FuncInfo->setHasPushSequences(true);
++
++ return true;
++}
++
++MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
++ MachineBasicBlock::iterator FrameSetup, unsigned Reg) {
++ // Do an extremely restricted form of load folding.
++ // ISel will often create patterns like:
++ // movl 4(%edi), %eax
++ // movl 8(%edi), %ecx
++ // movl 12(%edi), %edx
++ // movl %edx, 8(%esp)
++ // movl %ecx, 4(%esp)
++ // movl %eax, (%esp)
++ // call
++ // Get rid of those with prejudice.
++ if (!TargetRegisterInfo::isVirtualRegister(Reg))
++ return nullptr;
++
++ // Make sure this is the only use of Reg.
++ if (!MRI->hasOneNonDBGUse(Reg))
++ return nullptr;
++
++ MachineBasicBlock::iterator DefMI = MRI->getVRegDef(Reg);
++
++ // Make sure the def is a MOV from memory.
++ // If the def is an another block, give up.
++ if (DefMI->getOpcode() != X86::MOV32rm ||
++ DefMI->getParent() != FrameSetup->getParent())
++ return nullptr;
++
++ // Be careful with movs that load from a stack slot, since it may get
++ // resolved incorrectly.
++ // TODO: Again, we already have the infrastructure, so this should work.
++ if (!DefMI->getOperand(1).isReg())
++ return nullptr;
++
++ // Now, make sure everything else up until the ADJCALLSTACK is a sequence
++ // of MOVs. To be less conservative would require duplicating a lot of the
++ // logic from PeepholeOptimizer.
++ // FIXME: A possibly better approach would be to teach the PeepholeOptimizer
++ // to be smarter about folding into pushes.
++ for (auto I = DefMI; I != FrameSetup; ++I)
++ if (I->getOpcode() != X86::MOV32rm)
++ return nullptr;
++
++ return DefMI;
++}
+Index: lib/Target/X86/X86FastISel.cpp
+===================================================================
+--- lib/Target/X86/X86FastISel.cpp
++++ lib/Target/X86/X86FastISel.cpp
+@@ -2735,7 +2735,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
+- .addImm(NumBytes);
++ .addImm(NumBytes).addImm(0);
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
+Index: lib/Target/X86/X86FrameLowering.cpp
+===================================================================
+--- lib/Target/X86/X86FrameLowering.cpp
++++ lib/Target/X86/X86FrameLowering.cpp
+@@ -38,9 +38,36 @@ using namespace llvm;
+ extern cl::opt<bool> ForceStackAlign;
+
+ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+- return !MF.getFrameInfo()->hasVarSizedObjects();
++ return !MF.getFrameInfo()->hasVarSizedObjects() &&
++ !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
+ }
+
++/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
++/// call frame pseudos can be simplified. Having a FP, as in the default
++/// implementation, is not sufficient here since we can't always use it.
++/// Use a more nuanced condition.
++bool
++X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
++ const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
++ (MF.getSubtarget().getRegisterInfo());
++ return hasReservedCallFrame(MF) ||
++ (hasFP(MF) && !TRI->needsStackRealignment(MF))
++ || TRI->hasBasePointer(MF);
++}
++
++// needsFrameIndexResolution - Do we need to perform FI resolution for
++// this function. Normally, this is required only when the function
++// has any stack objects. However, FI resolution actually has another job,
++// not apparent from the title - it resolves callframesetup/destroy
++// that were not simplified earlier.
++// So, this is required for x86 functions that have push sequences even
++// when there are no stack objects.
++bool
++X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
++ return MF.getFrameInfo()->hasStackObjects() ||
++ MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
++}
++
+ /// hasFP - Return true if the specified function should have a dedicated frame
+ /// pointer register. This is true if the function has variable sized allocas
+ /// or if frame pointer elimination is disabled.
+@@ -93,16 +120,6 @@ static unsigned getANDriOpcode(bool IsLP64, int64_
+ return X86::AND32ri;
+ }
+
+-static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) {
+- // We don't support LP64 for now.
+- assert(!IsLP64);
+-
+- if (MO.isImm() && isInt<8>(MO.getImm()))
+- return X86::PUSH32i8;
+-
+- return X86::PUSHi32;;
+-}
+-
+ static unsigned getLEArOpcode(unsigned IsLP64) {
+ return IsLP64 ? X86::LEA64r : X86::LEA32r;
+ }
+@@ -1882,100 +1899,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi
+ #endif
+ }
+
+-bool X86FrameLowering::
+-convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
+- MachineBasicBlock::iterator I, uint64_t Amount) const {
+- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+- const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
+- MF.getSubtarget().getRegisterInfo());
+- unsigned StackPtr = RegInfo.getStackRegister();
+-
+- // Scan the call setup sequence for the pattern we're looking for.
+- // We only handle a simple case now - a sequence of MOV32mi or MOV32mr
+- // instructions, that push a sequence of 32-bit values onto the stack, with
+- // no gaps.
+- std::map<int64_t, MachineBasicBlock::iterator> MovMap;
+- do {
+- int Opcode = I->getOpcode();
+- if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
+- break;
+-
+- // We only want movs of the form:
+- // movl imm/r32, k(%ecx)
+- // If we run into something else, bail
+- // Note that AddrBaseReg may, counterintuitively, not be a register...
+- if (!I->getOperand(X86::AddrBaseReg).isReg() ||
+- (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
+- !I->getOperand(X86::AddrScaleAmt).isImm() ||
+- (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
+- (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
+- (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
+- !I->getOperand(X86::AddrDisp).isImm())
+- return false;
+-
+- int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
+-
+- // We don't want to consider the unaligned case.
+- if (StackDisp % 4)
+- return false;
+-
+- // If the same stack slot is being filled twice, something's fishy.
+- if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
+- return false;
+-
+- ++I;
+- } while (I != MBB.end());
+-
+- // We now expect the end of the sequence - a call and a stack adjust.
+- if (I == MBB.end())
+- return false;
+- if (!I->isCall())
+- return false;
+- MachineBasicBlock::iterator Call = I;
+- if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
+- return false;
+-
+- // Now, go through the map, and see that we don't have any gaps,
+- // but only a series of 32-bit MOVs.
+- // Since std::map provides ordered iteration, the original order
+- // of the MOVs doesn't matter.
+- int64_t ExpectedDist = 0;
+- for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
+- ++MMI, ExpectedDist += 4)
+- if (MMI->first != ExpectedDist)
+- return false;
+-
+- // Ok, everything looks fine. Do the transformation.
+- DebugLoc DL = I->getDebugLoc();
+-
+- // It's possible the original stack adjustment amount was larger than
+- // that done by the pushes. If so, we still need a SUB.
+- Amount -= ExpectedDist;
+- if (Amount) {
+- MachineInstr* Sub = BuildMI(MBB, Call, DL,
+- TII.get(getSUBriOpcode(false, Amount)), StackPtr)
+- .addReg(StackPtr).addImm(Amount);
+- Sub->getOperand(3).setIsDead();
+- }
+-
+- // Now, iterate through the map in reverse order, and replace the movs
+- // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
+- for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
+- MachineBasicBlock::iterator MOV = MMI->second;
+- MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
+-
+- // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size
+- int PushOpcode = X86::PUSH32r;
+- if (MOV->getOpcode() == X86::MOV32mi)
+- PushOpcode = getPUSHiOpcode(false, PushOp);
+-
+- BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp);
+- MBB.erase(MOV);
+- }
+-
+- return true;
+-}
+-
+ void X86FrameLowering::
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+@@ -1990,7 +1913,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ bool IsLP64 = STI.isTarget64BitLP64();
+ DebugLoc DL = I->getDebugLoc();
+ uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
+- uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
++ uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
+ I = MBB.erase(I);
+
+ if (!reserveCallFrame) {
+@@ -2010,24 +1933,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+ MachineInstr *New = nullptr;
+- if (Opcode == TII.getCallFrameSetupOpcode()) {
+- // Try to convert movs to the stack into pushes.
+- // We currently only look for a pattern that appears in 32-bit
+- // calling conventions.
+- if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
+- return;
+
+- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
+- StackPtr)
+- .addReg(StackPtr)
+- .addImm(Amount);
+- } else {
+- assert(Opcode == TII.getCallFrameDestroyOpcode());
++ // Factor out the amount that gets handled inside the sequence
++ // (Pushes of argument for frame setup, callee pops for frame destroy)
++ Amount -= InternalAmt;
+
+- // Factor out the amount the callee already popped.
+- Amount -= CalleeAmt;
++ if (Amount) {
++ if (Opcode == TII.getCallFrameSetupOpcode()) {
++ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
++ .addReg(StackPtr).addImm(Amount);
++ } else {
++ assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+- if (Amount) {
+ unsigned Opc = getADDriOpcode(IsLP64, Amount);
+ New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr).addImm(Amount);
+@@ -2045,13 +1962,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ return;
+ }
+
+- if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
++ if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
+ // If we are performing frame pointer elimination and if the callee pops
+ // something off the stack pointer, add it back. We do this until we have
+ // more advanced stack pointer tracking ability.
+- unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
++ unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
+ MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+- .addReg(StackPtr).addImm(CalleeAmt);
++ .addReg(StackPtr).addImm(InternalAmt);
+
+ // The EFLAGS implicit def is dead.
+ New->getOperand(3).setIsDead();
+Index: lib/Target/X86/X86FrameLowering.h
+===================================================================
+--- lib/Target/X86/X86FrameLowering.h
++++ lib/Target/X86/X86FrameLowering.h
+@@ -66,6 +66,8 @@ class X86FrameLowering : public TargetFrameLowerin
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
++ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
++ bool needsFrameIndexResolution(const MachineFunction &MF) const override;
+
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+Index: lib/Target/X86/X86InstrCompiler.td
+===================================================================
+--- lib/Target/X86/X86InstrCompiler.td
++++ lib/Target/X86/X86InstrCompiler.td
+@@ -43,9 +43,9 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses
+ // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+ // sub / add which can clobber EFLAGS.
+ let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+-def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
++def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKDOWN",
+- [(X86callseq_start timm:$amt)]>,
++ []>,
+ Requires<[NotLP64]>;
+ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+@@ -52,7 +52,10 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[NotLP64]>;
+ }
++def : Pat<(X86callseq_start timm:$amt1),
++ (ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;
+
++
+ // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+ // a stack adjustment and the codegen must know that they may modify the stack
+ // pointer before prolog-epilog rewriting occurs.
+@@ -59,9 +62,9 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins
+ // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+ // sub / add which can clobber EFLAGS.
+ let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+-def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
++def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKDOWN",
+- [(X86callseq_start timm:$amt)]>,
++ []>,
+ Requires<[IsLP64]>;
+ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+@@ -68,9 +71,10 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[IsLP64]>;
+ }
++def : Pat<(X86callseq_start timm:$amt1),
++ (ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;
+
+
+-
+ // x86-64 va_start lowering magic.
+ let usesCustomInserter = 1, Defs = [EFLAGS] in {
+ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+Index: lib/Target/X86/X86InstrInfo.cpp
+===================================================================
+--- lib/Target/X86/X86InstrInfo.cpp
++++ lib/Target/X86/X86InstrInfo.cpp
+@@ -1692,6 +1692,58 @@ X86InstrInfo::isCoalescableExtInstr(const MachineI
+ return false;
+ }
+
++int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
++ const MachineFunction *MF = MI->getParent()->getParent();
++ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
++
++ if (MI->getOpcode() == getCallFrameSetupOpcode() ||
++ MI->getOpcode() == getCallFrameDestroyOpcode()) {
++ unsigned StackAlign = TFI->getStackAlignment();
++ int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign *
++ StackAlign;
++
++ SPAdj -= MI->getOperand(1).getImm();
++
++ if (MI->getOpcode() == getCallFrameSetupOpcode())
++ return SPAdj;
++ else
++ return -SPAdj;
++ }
++
++ // To know whether a call adjusts the stack, we need information
++ // that is bound to the following ADJCALLSTACKUP pseudo.
++ // Look for the next ADJCALLSTACKUP that follows the call.
++ if (MI->isCall()) {
++ const MachineBasicBlock* MBB = MI->getParent();
++ auto I = ++MachineBasicBlock::const_iterator(MI);
++ for (auto E = MBB->end(); I != E; ++I) {
++ if (I->getOpcode() == getCallFrameDestroyOpcode() ||
++ I->isCall())
++ break;
++ }
++
++ // If we could not find a frame destroy opcode, then it has already
++ // been simplified, so we don't care.
++ if (I->getOpcode() != getCallFrameDestroyOpcode())
++ return 0;
++
++ return -(I->getOperand(1).getImm());
++ }
++
++ // Currently handle only PUSHes we can reasonably expect to see
++ // in call sequences
++ switch (MI->getOpcode()) {
++ default:
++ return 0;
++ case X86::PUSH32i8:
++ case X86::PUSH32r:
++ case X86::PUSH32rmm:
++ case X86::PUSH32rmr:
++ case X86::PUSHi32:
++ return 4;
++ }
++}
++
+ /// isFrameOperand - Return true and the FrameIndex if the specified
+ /// operand and follow operands form a reference to the stack frame.
+ bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+Index: lib/Target/X86/X86InstrInfo.h
+===================================================================
+--- lib/Target/X86/X86InstrInfo.h
++++ lib/Target/X86/X86InstrInfo.h
+@@ -175,6 +175,11 @@ class X86InstrInfo final : public X86GenInstrInfo
+ ///
+ const X86RegisterInfo &getRegisterInfo() const { return RI; }
+
++ /// getSPAdjust - This returns the stack pointer adjustment made by
++ /// this instruction. For x86, we need to handle more complex call
++ /// sequences involving PUSHes.
++ int getSPAdjust(const MachineInstr *MI) const override;
++
+ /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+ /// extension instruction. That is, it's like a copy where it's legal for the
+ /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+Index: lib/Target/X86/X86MachineFunctionInfo.h
+===================================================================
+--- lib/Target/X86/X86MachineFunctionInfo.h
++++ lib/Target/X86/X86MachineFunctionInfo.h
+@@ -77,6 +77,9 @@ class X86MachineFunctionInfo : public MachineFunct
+ unsigned ArgumentStackSize;
+ /// NumLocalDynamics - Number of local-dynamic TLS accesses.
+ unsigned NumLocalDynamics;
++ /// HasPushSequences - Keeps track of whether this function uses sequences
++ /// of pushes to pass function parameters.
++ bool HasPushSequences;
+
+ private:
+ /// ForwardedMustTailRegParms - A list of virtual and physical registers
+@@ -97,7 +100,8 @@ class X86MachineFunctionInfo : public MachineFunct
+ VarArgsGPOffset(0),
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0),
+- NumLocalDynamics(0) {}
++ NumLocalDynamics(0),
++ HasPushSequences(false) {}
+
+ explicit X86MachineFunctionInfo(MachineFunction &MF)
+ : ForceFramePointer(false),
+@@ -113,11 +117,15 @@ class X86MachineFunctionInfo : public MachineFunct
+ VarArgsGPOffset(0),
+ VarArgsFPOffset(0),
+ ArgumentStackSize(0),
+- NumLocalDynamics(0) {}
++ NumLocalDynamics(0),
++ HasPushSequences(false) {}
+
+ bool getForceFramePointer() const { return ForceFramePointer;}
+ void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
++ bool getHasPushSequences() const { return HasPushSequences; }
++ void setHasPushSequences(bool HasPush) { HasPushSequences = HasPush; }
++
+ bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; }
+ void setRestoreBasePointer(const MachineFunction *MF);
+ int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
+Index: lib/Target/X86/X86RegisterInfo.cpp
+===================================================================
+--- lib/Target/X86/X86RegisterInfo.cpp
++++ lib/Target/X86/X86RegisterInfo.cpp
+@@ -468,8 +468,6 @@ void
+ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+- assert(SPAdj == 0 && "Unexpected");
+-
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+@@ -506,6 +504,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicB
+ } else
+ FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+
++ if (BasePtr == StackPtr)
++ FIOffset += SPAdj;
++
+ // The frame index format for stackmaps and patchpoints is different from the
+ // X86 format. It only has a FI and an offset.
+ if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
+Index: lib/Target/X86/X86TargetMachine.cpp
+===================================================================
+--- lib/Target/X86/X86TargetMachine.cpp
++++ lib/Target/X86/X86TargetMachine.cpp
+@@ -154,6 +154,7 @@ class X86PassConfig : public TargetPassConfig {
+ void addIRPasses() override;
+ bool addInstSelector() override;
+ bool addILPOpts() override;
++ void addPreRegAlloc() override;
+ void addPostRegAlloc() override;
+ void addPreEmitPass() override;
+ };
+@@ -187,6 +188,10 @@ bool X86PassConfig::addILPOpts() {
+ return true;
+ }
+
++void X86PassConfig::addPreRegAlloc() {
++ addPass(createX86CallFrameOptimization());
++}
++
+ void X86PassConfig::addPostRegAlloc() {
+ addPass(createX86FloatingPointStackifierPass());
+ }
+Index: test/CodeGen/X86/inalloca-invoke.ll
+===================================================================
+--- test/CodeGen/X86/inalloca-invoke.ll
++++ test/CodeGen/X86/inalloca-invoke.ll
+@@ -31,7 +31,7 @@ blah:
+ to label %invoke.cont unwind label %lpad
+
+ ; Uses end as sret param.
+-; CHECK: movl %[[end]], (%esp)
++; CHECK: pushl %[[end]]
+ ; CHECK: calll _plus
+
+ invoke.cont:
+Index: test/CodeGen/X86/movtopush.ll
+===================================================================
+--- test/CodeGen/X86/movtopush.ll
++++ test/CodeGen/X86/movtopush.ll
+@@ -1,10 +1,12 @@
+ ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
++; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
+ ; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
++
+ declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+ declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
+
+ ; Here, we should have a reserved frame, so we don't expect pushes
+-; NORMAL-LABEL: test1
++; NORMAL-LABEL: test1:
+ ; NORMAL: subl $16, %esp
+ ; NORMAL-NEXT: movl $4, 12(%esp)
+ ; NORMAL-NEXT: movl $3, 8(%esp)
+@@ -11,6 +13,7 @@ declare void @inreg(i32 %a, i32 inreg %b, i32 %c,
+ ; NORMAL-NEXT: movl $2, 4(%esp)
+ ; NORMAL-NEXT: movl $1, (%esp)
+ ; NORMAL-NEXT: call
++; NORMAL-NEXT: addl $16, %esp
+ define void @test1() {
+ entry:
+ call void @good(i32 1, i32 2, i32 3, i32 4)
+@@ -17,8 +20,10 @@ entry:
+ ret void
+ }
+
+-; Here, we expect a sequence of 4 immediate pushes
+-; NORMAL-LABEL: test2
++; We're optimizing for code size, so we should get pushes for x86,
++; even though there is a reserved call frame.
++; Make sure we don't touch x86-64
++; NORMAL-LABEL: test1b:
+ ; NORMAL-NOT: subl {{.*}} %esp
+ ; NORMAL: pushl $4
+ ; NORMAL-NEXT: pushl $3
+@@ -25,6 +30,42 @@ entry:
+ ; NORMAL-NEXT: pushl $2
+ ; NORMAL-NEXT: pushl $1
+ ; NORMAL-NEXT: call
++; NORMAL-NEXT: addl $16, %esp
++; X64-LABEL: test1b:
++; X64: movl $1, %ecx
++; X64-NEXT: movl $2, %edx
++; X64-NEXT: movl $3, %r8d
++; X64-NEXT: movl $4, %r9d
++; X64-NEXT: callq good
++define void @test1b() optsize {
++entry:
++ call void @good(i32 1, i32 2, i32 3, i32 4)
++ ret void
++}
++
++; Same as above, but for minsize
++; NORMAL-LABEL: test1c:
++; NORMAL-NOT: subl {{.*}} %esp
++; NORMAL: pushl $4
++; NORMAL-NEXT: pushl $3
++; NORMAL-NEXT: pushl $2
++; NORMAL-NEXT: pushl $1
++; NORMAL-NEXT: call
++; NORMAL-NEXT: addl $16, %esp
++define void @test1c() minsize {
++entry:
++ call void @good(i32 1, i32 2, i32 3, i32 4)
++ ret void
++}
++
++; If we have a reserved frame, we should have pushes
++; NORMAL-LABEL: test2:
++; NORMAL-NOT: subl {{.*}} %esp
++; NORMAL: pushl $4
++; NORMAL-NEXT: pushl $3
++; NORMAL-NEXT: pushl $2
++; NORMAL-NEXT: pushl $1
++; NORMAL-NEXT: call
+ define void @test2(i32 %k) {
+ entry:
+ %a = alloca i32, i32 %k
+@@ -34,7 +75,7 @@ entry:
+
+ ; Again, we expect a sequence of 4 immediate pushes
+ ; Checks that we generate the right pushes for >8bit immediates
+-; NORMAL-LABEL: test2b
++; NORMAL-LABEL: test2b:
+ ; NORMAL-NOT: subl {{.*}} %esp
+ ; NORMAL: pushl $4096
+ ; NORMAL-NEXT: pushl $3072
+@@ -41,15 +82,15 @@ entry:
+ ; NORMAL-NEXT: pushl $2048
+ ; NORMAL-NEXT: pushl $1024
+ ; NORMAL-NEXT: call
+-define void @test2b(i32 %k) {
++; NORMAL-NEXT: addl $16, %esp
++define void @test2b() optsize {
+ entry:
+- %a = alloca i32, i32 %k
+ call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
+ ret void
+ }
+
+ ; The first push should push a register
+-; NORMAL-LABEL: test3
++; NORMAL-LABEL: test3:
+ ; NORMAL-NOT: subl {{.*}} %esp
+ ; NORMAL: pushl $4
+ ; NORMAL-NEXT: pushl $3
+@@ -56,15 +97,15 @@ entry:
+ ; NORMAL-NEXT: pushl $2
+ ; NORMAL-NEXT: pushl %e{{..}}
+ ; NORMAL-NEXT: call
+-define void @test3(i32 %k) {
++; NORMAL-NEXT: addl $16, %esp
++define void @test3(i32 %k) optsize {
+ entry:
+- %a = alloca i32, i32 %k
+ call void @good(i32 %k, i32 2, i32 3, i32 4)
+ ret void
+ }
+
+ ; We don't support weird calling conventions
+-; NORMAL-LABEL: test4
++; NORMAL-LABEL: test4:
+ ; NORMAL: subl $12, %esp
+ ; NORMAL-NEXT: movl $4, 8(%esp)
+ ; NORMAL-NEXT: movl $3, 4(%esp)
+@@ -71,16 +112,16 @@ entry:
+ ; NORMAL-NEXT: movl $1, (%esp)
+ ; NORMAL-NEXT: movl $2, %eax
+ ; NORMAL-NEXT: call
+-define void @test4(i32 %k) {
++; NORMAL-NEXT: addl $12, %esp
++define void @test4() optsize {
+ entry:
+- %a = alloca i32, i32 %k
+ call void @inreg(i32 1, i32 2, i32 3, i32 4)
+ ret void
+ }
+
+-; Check that additional alignment is added when the pushes
+-; don't add up to the required alignment.
+-; ALIGNED-LABEL: test5
++; When there is no reserved call frame, check that additional alignment
++; is added when the pushes don't add up to the required alignment.
++; ALIGNED-LABEL: test5:
+ ; ALIGNED: subl $16, %esp
+ ; ALIGNED-NEXT: pushl $4
+ ; ALIGNED-NEXT: pushl $3
+@@ -97,7 +138,7 @@ entry:
+ ; Check that pushing the addresses of globals (Or generally, things that
+ ; aren't exactly immediates) isn't broken.
+ ; Fixes PR21878.
+-; NORMAL-LABEL: test6
++; NORMAL-LABEL: test6:
+ ; NORMAL: pushl $_ext
+ ; NORMAL-NEXT: call
+ declare void @f(i8*)
+@@ -110,3 +151,108 @@ bb:
+ alloca i32
+ ret void
+ }
++
++; Check that we fold simple cases into the push
++; NORMAL-LABEL: test7:
++; NORMAL-NOT: subl {{.*}} %esp
++; NORMAL: movl 4(%esp), [[EAX:%e..]]
++; NORMAL-NEXT: pushl $4
++; NORMAL-NEXT: pushl ([[EAX]])
++; NORMAL-NEXT: pushl $2
++; NORMAL-NEXT: pushl $1
++; NORMAL-NEXT: call
++; NORMAL-NEXT: addl $16, %esp
++define void @test7(i32* %ptr) optsize {
++entry:
++ %val = load i32* %ptr
++ call void @good(i32 1, i32 2, i32 %val, i32 4)
++ ret void
++}
++
++; But we don't want to fold stack-relative loads into the push,
++; because the offset will be wrong
++; NORMAL-LABEL: test8:
++; NORMAL-NOT: subl {{.*}} %esp
++; NORMAL: movl 4(%esp), [[EAX:%e..]]
++; NORMAL-NEXT: pushl $4
++; NORMAL-NEXT: pushl [[EAX]]
++; NORMAL-NEXT: pushl $2
++; NORMAL-NEXT: pushl $1
++; NORMAL-NEXT: call
++; NORMAL-NEXT: addl $16, %esp
++define void @test8(i32* %ptr) optsize {
++entry:
++ %val = ptrtoint i32* %ptr to i32
++ call void @good(i32 1, i32 2, i32 %val, i32 4)
++ ret void
++}
++
++; If one function is using push instructions, and the other isn't
++; (because it has frame-index references), then we must resolve
++; these references correctly.
++; NORMAL-LABEL: test9:
++; NORMAL-NOT: leal (%esp),
++; NORMAL: pushl $4
++; NORMAL-NEXT: pushl $3
++; NORMAL-NEXT: pushl $2
++; NORMAL-NEXT: pushl $1
++; NORMAL-NEXT: call
++; NORMAL-NEXT: addl $16, %esp
++; NORMAL-NEXT: subl $16, %esp
++; NORMAL-NEXT: leal 16(%esp), [[EAX:%e..]]
++; NORMAL-NEXT: movl [[EAX]], 12(%esp)
++; NORMAL-NEXT: movl $7, 8(%esp)
++; NORMAL-NEXT: movl $6, 4(%esp)
++; NORMAL-NEXT: movl $5, (%esp)
++; NORMAL-NEXT: call
++; NORMAL-NEXT: addl $16, %esp
++define void @test9() optsize {
++entry:
++ %p = alloca i32, align 4
++ call void @good(i32 1, i32 2, i32 3, i32 4)
++ %0 = ptrtoint i32* %p to i32
++ call void @good(i32 5, i32 6, i32 7, i32 %0)
++ ret void
++}
++
++; We can end up with an indirect call which gets reloaded on the spot.
++; Make sure we reference the correct stack slot - we spill into (%esp)
++; and reload from 16(%esp) due to the pushes.
++; NORMAL-LABEL: test10:
++; NORMAL: movl $_good, [[ALLOC:.*]]
++; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]]
++; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill
++; NORMAL: nop
++; NORMAL: pushl $4
++; NORMAL-NEXT: pushl $3
++; NORMAL-NEXT: pushl $2
++; NORMAL-NEXT: pushl $1
++; NORMAL-NEXT: calll *16(%esp)
++; NORMAL-NEXT: addl $16, %esp
++define void @test10() optsize {
++ %stack_fptr = alloca void (i32, i32, i32, i32)*
++ store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
++ %good_ptr = load volatile void (i32, i32, i32, i32)** %stack_fptr
++ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
++ call void (i32, i32, i32, i32)* %good_ptr(i32 1, i32 2, i32 3, i32 4)
++ ret void
++}
++
++; We can't fold the load from the global into the push because of
++; interference from the store
++; NORMAL-LABEL: test11:
++; NORMAL: movl _the_global, [[EAX:%e..]]
++; NORMAL-NEXT: movl $42, _the_global
++; NORMAL-NEXT: pushl $4
++; NORMAL-NEXT: pushl $3
++; NORMAL-NEXT: pushl $2
++; NORMAL-NEXT: pushl [[EAX]]
++; NORMAL-NEXT: call
++; NORMAL-NEXT: addl $16, %esp
++@the_global = external global i32
++define void @test11() optsize {
++ %myload = load i32* @the_global
++ store i32 42, i32* @the_global
++ call void @good(i32 %myload, i32 2, i32 3, i32 4)
++ ret void
++}
diff --git a/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff b/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff
new file mode 100644
index 0000000..2896899
--- /dev/null
+++ b/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff
@@ -0,0 +1,419 @@
+Pull in r230348 from upstream llvm trunk (by Tim Northover):
+
+ ARM: treat [N x i32] and [N x i64] as AAPCS composite types
+
+ The logic is almost there already, with our special homogeneous
+ aggregate handling. Tweaking it like this allows front-ends to emit
+ AAPCS compliant code without ever having to count registers or add
+ discarded padding arguments.
+
+ Only arrays of i32 and i64 are needed to model AAPCS rules, but I
+ decided to apply the logic to all integer arrays for more consistency.
+
+This fixes a possible "Unexpected member type for HA" error when
+compiling lib/msun/bsdsrc/b_tgamma.c for armv6.
+
+Reported by: Jakub Palider <jpa@semihalf.com>
+
+Introduced here: https://svnweb.freebsd.org/changeset/base/280400
+
+Index: include/llvm/CodeGen/CallingConvLower.h
+===================================================================
+--- include/llvm/CodeGen/CallingConvLower.h
++++ include/llvm/CodeGen/CallingConvLower.h
+@@ -122,8 +122,8 @@ class CCValAssign {
+ // There is no need to differentiate between a pending CCValAssign and other
+ // kinds, as they are stored in a different list.
+ static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT,
+- LocInfo HTP) {
+- return getReg(ValNo, ValVT, 0, LocVT, HTP);
++ LocInfo HTP, unsigned ExtraInfo = 0) {
++ return getReg(ValNo, ValVT, ExtraInfo, LocVT, HTP);
+ }
+
+ void convertToReg(unsigned RegNo) {
+@@ -146,6 +146,7 @@ class CCValAssign {
+
+ unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
+ unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
++ unsigned getExtraInfo() const { return Loc; }
+ MVT getLocVT() const { return LocVT; }
+
+ LocInfo getLocInfo() const { return HTP; }
+Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+===================================================================
+--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
++++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+@@ -7429,11 +7429,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo
+ }
+ if (Args[i].isNest)
+ Flags.setNest();
+- if (NeedsRegBlock) {
++ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+- if (Value == NumValues - 1)
+- Flags.setInConsecutiveRegsLast();
+- }
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+@@ -7482,6 +7479,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo
+ CLI.Outs.push_back(MyFlags);
+ CLI.OutVals.push_back(Parts[j]);
+ }
++
++ if (NeedsRegBlock && Value == NumValues - 1)
++ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
+ }
+ }
+
+@@ -7697,11 +7697,8 @@ void SelectionDAGISel::LowerArguments(const Functi
+ }
+ if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
+ Flags.setNest();
+- if (NeedsRegBlock) {
++ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
+- if (Value == NumValues - 1)
+- Flags.setInConsecutiveRegsLast();
+- }
+ Flags.setOrigAlign(OriginalAlignment);
+
+ MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
+@@ -7716,6 +7713,8 @@ void SelectionDAGISel::LowerArguments(const Functi
+ MyFlags.Flags.setOrigAlign(1);
+ Ins.push_back(MyFlags);
+ }
++ if (NeedsRegBlock && Value == NumValues - 1)
++ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
+ PartBase += VT.getStoreSize();
+ }
+ }
+Index: lib/Target/ARM/ARMCallingConv.h
+===================================================================
+--- lib/Target/ARM/ARMCallingConv.h
++++ lib/Target/ARM/ARMCallingConv.h
+@@ -160,6 +160,8 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &V
+ State);
+ }
+
++static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
++
+ static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+@@ -168,81 +170,114 @@ static const uint16_t DRegList[] = { ARM::D0, ARM:
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+ static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+
++
+ // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
+ // has InConsecutiveRegs set, and that the last member also has
+ // InConsecutiveRegsLast set. We must process all members of the HA before
+ // we can allocate it, as we need to know the total number of registers that
+ // will be needed in order to (attempt to) allocate a contiguous block.
+-static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+- CCValAssign::LocInfo &LocInfo,
+- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+- SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
++static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
++ MVT &LocVT,
++ CCValAssign::LocInfo &LocInfo,
++ ISD::ArgFlagsTy &ArgFlags,
++ CCState &State) {
++ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // AAPCS HFAs must have 1-4 elements, all of the same type
+- assert(PendingHAMembers.size() < 4);
+- if (PendingHAMembers.size() > 0)
+- assert(PendingHAMembers[0].getLocVT() == LocVT);
++ if (PendingMembers.size() > 0)
++ assert(PendingMembers[0].getLocVT() == LocVT);
+
+ // Add the argument to the list to be allocated once we know the size of the
+- // HA
+- PendingHAMembers.push_back(
+- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
++ // aggregate. Store the type's required alignmnent as extra info for later: in
++ // the [N x i64] case all trace has been removed by the time we actually get
++ // to do allocation.
++ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
++ ArgFlags.getOrigAlign()));
+
+- if (ArgFlags.isInConsecutiveRegsLast()) {
+- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
+- "Homogeneous aggregates must have between 1 and 4 members");
++ if (!ArgFlags.isInConsecutiveRegsLast())
++ return true;
+
+- // Try to allocate a contiguous block of registers, each of the correct
+- // size to hold one member.
+- ArrayRef<uint16_t> RegList;
+- switch (LocVT.SimpleTy) {
+- case MVT::f32:
+- RegList = SRegList;
+- break;
+- case MVT::f64:
+- RegList = DRegList;
+- break;
+- case MVT::v2f64:
+- RegList = QRegList;
+- break;
+- default:
+- llvm_unreachable("Unexpected member type for HA");
+- break;
+- }
++ // Try to allocate a contiguous block of registers, each of the correct
++ // size to hold one member.
++ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
+
+- unsigned RegResult =
+- State.AllocateRegBlock(RegList, PendingHAMembers.size());
++ ArrayRef<uint16_t> RegList;
++ switch (LocVT.SimpleTy) {
++ case MVT::i32: {
++ RegList = RRegList;
++ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size());
+
+- if (RegResult) {
+- for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
+- It != PendingHAMembers.end(); ++It) {
+- It->convertToReg(RegResult);
+- State.addLoc(*It);
+- ++RegResult;
+- }
+- PendingHAMembers.clear();
+- return true;
+- }
++ // First consume all registers that would give an unaligned object. Whether
++ // we go on stack or in regs, no-one will be using them in future.
++ unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4;
++ while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
++ State.AllocateReg(RegList[RegIdx++]);
+
+- // Register allocation failed, fall back to the stack
++ break;
++ }
++ case MVT::f32:
++ RegList = SRegList;
++ break;
++ case MVT::f64:
++ RegList = DRegList;
++ break;
++ case MVT::v2f64:
++ RegList = QRegList;
++ break;
++ default:
++ llvm_unreachable("Unexpected member type for block aggregate");
++ break;
++ }
+
+- // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp)
+- for (unsigned regNo = 0; regNo < 16; ++regNo)
+- State.AllocateReg(SRegList[regNo]);
++ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
++ if (RegResult) {
++ for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
++ It != PendingMembers.end(); ++It) {
++ It->convertToReg(RegResult);
++ State.addLoc(*It);
++ ++RegResult;
++ }
++ PendingMembers.clear();
++ return true;
++ }
+
+- unsigned Size = LocVT.getSizeInBits() / 8;
+- unsigned Align = std::min(Size, 8U);
++ // Register allocation failed, we'll be needing the stack
++ unsigned Size = LocVT.getSizeInBits() / 8;
++ if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
++ // If nothing else has used the stack until this point, a non-HFA aggregate
++ // can be split between regs and stack.
++ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size());
++ for (auto &It : PendingMembers) {
++ if (RegIdx >= RegList.size())
++ It.convertToMem(State.AllocateStack(Size, Size));
++ else
++ It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
+
+- for (auto It : PendingHAMembers) {
+- It.convertToMem(State.AllocateStack(Size, Align));
+ State.addLoc(It);
+ }
++ PendingMembers.clear();
++ return true;
++ } else if (LocVT != MVT::i32)
++ RegList = SRegList;
+
+- // All pending members have now been allocated
+- PendingHAMembers.clear();
++ // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
++ for (auto Reg : RegList)
++ State.AllocateReg(Reg);
++
++ for (auto &It : PendingMembers) {
++ It.convertToMem(State.AllocateStack(Size, Align));
++ State.addLoc(It);
++
++ // After the first item has been allocated, the rest are packed as tightly
++ // as possible. (E.g. an incoming i64 would have starting Align of 8, but
++ // we'll be allocating a bunch of i32 slots).
++ Align = Size;
+ }
+
+- // This will be allocated by the last member of the HA
++ // All pending members have now been allocated
++ PendingMembers.clear();
++
++ // This will be allocated by the last member of the aggregate
+ return true;
+ }
+
+Index: lib/Target/ARM/ARMCallingConv.td
+===================================================================
+--- lib/Target/ARM/ARMCallingConv.td
++++ lib/Target/ARM/ARMCallingConv.td
+@@ -175,7 +175,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+ // HFAs are passed in a contiguous block of registers, or on the stack
+- CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>,
++ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
+
+ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+Index: lib/Target/ARM/ARMISelLowering.cpp
+===================================================================
+--- lib/Target/ARM/ARMISelLowering.cpp
++++ lib/Target/ARM/ARMISelLowering.cpp
+@@ -11285,7 +11285,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas
+ return (Members > 0 && Members <= 4);
+ }
+
+-/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
++/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
++/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
++/// passing according to AAPCS rules.
+ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ if (getEffectiveCallingConv(CallConv, isVarArg) !=
+@@ -11294,7 +11296,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse
+
+ HABaseType Base = HA_UNKNOWN;
+ uint64_t Members = 0;
+- bool result = isHomogeneousAggregate(Ty, Base, Members);
+- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump());
+- return result;
++ bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
++ DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
++
++ bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
++ return IsHA || IsIntArray;
+ }
+Index: test/CodeGen/ARM/aggregate-padding.ll
+===================================================================
+--- test/CodeGen/ARM/aggregate-padding.ll
++++ test/CodeGen/ARM/aggregate-padding.ll
+@@ -0,0 +1,101 @@
++; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
++
++; [2 x i64] should be contiguous when split (e.g. we shouldn't try to align all
++; i32 components to 64 bits). Also makes sure i64 based types are properly
++; aligned on the stack.
++define i64 @test_i64_contiguous_on_stack([8 x double], float, i32 %in, [2 x i64] %arg) nounwind {
++; CHECK-LABEL: test_i64_contiguous_on_stack:
++; CHECK-DAG: ldr [[LO0:r[0-9]+]], [sp, #8]
++; CHECK-DAG: ldr [[HI0:r[0-9]+]], [sp, #12]
++; CHECK-DAG: ldr [[LO1:r[0-9]+]], [sp, #16]
++; CHECK-DAG: ldr [[HI1:r[0-9]+]], [sp, #20]
++; CHECK: adds r0, [[LO0]], [[LO1]]
++; CHECK: adc r1, [[HI0]], [[HI1]]
++
++ %val1 = extractvalue [2 x i64] %arg, 0
++ %val2 = extractvalue [2 x i64] %arg, 1
++ %sum = add i64 %val1, %val2
++ ret i64 %sum
++}
++
++; [2 x i64] should try to use looks for 4 regs, not 8 (which might happen if the
++; i64 -> i32, i32 split wasn't handled correctly).
++define i64 @test_2xi64_uses_4_regs([8 x double], float, [2 x i64] %arg) nounwind {
++; CHECK-LABEL: test_2xi64_uses_4_regs:
++; CHECK-DAG: mov r0, r2
++; CHECK-DAG: mov r1, r3
++
++ %val = extractvalue [2 x i64] %arg, 1
++ ret i64 %val
++}
++
++; An aggregate should be able to split between registers and stack if there is
++; nothing else on the stack.
++define i32 @test_aggregates_split([8 x double], i32, [4 x i32] %arg) nounwind {
++; CHECK-LABEL: test_aggregates_split:
++; CHECK: ldr [[VAL3:r[0-9]+]], [sp]
++; CHECK: add r0, r1, [[VAL3]]
++
++ %val0 = extractvalue [4 x i32] %arg, 0
++ %val3 = extractvalue [4 x i32] %arg, 3
++ %sum = add i32 %val0, %val3
++ ret i32 %sum
++}
++
++; If an aggregate has to be moved entirely onto the stack, nothing should be
++; able to use r0-r3 any more. Also checks that [2 x i64] properly aligned when
++; it uses regs.
++define i32 @test_no_int_backfilling([8 x double], float, i32, [2 x i64], i32 %arg) nounwind {
++; CHECK-LABEL: test_no_int_backfilling:
++; CHECK: ldr r0, [sp, #24]
++ ret i32 %arg
++}
++
++; Even if the argument was successfully allocated as reg block, there should be
++; no backfillig to r1.
++define i32 @test_no_int_backfilling_regsonly(i32, [1 x i64], i32 %arg) {
++; CHECK-LABEL: test_no_int_backfilling_regsonly:
++; CHECK: ldr r0, [sp]
++ ret i32 %arg
++}
++
++; If an aggregate has to be moved entirely onto the stack, nothing should be
++; able to use r0-r3 any more.
++define float @test_no_float_backfilling([7 x double], [4 x i32], i32, [4 x double], float %arg) nounwind {
++; CHECK-LABEL: test_no_float_backfilling:
++; CHECK: vldr s0, [sp, #40]
++ ret float %arg
++}
++
++; They're a bit pointless, but types like [N x i8] should work as well.
++define i8 @test_i8_in_regs(i32, [3 x i8] %arg) {
++; CHECK-LABEL: test_i8_in_regs:
++; CHECK: add r0, r1, r3
++ %val0 = extractvalue [3 x i8] %arg, 0
++ %val2 = extractvalue [3 x i8] %arg, 2
++ %sum = add i8 %val0, %val2
++ ret i8 %sum
++}
++
++define i16 @test_i16_split(i32, i32, [3 x i16] %arg) {
++; CHECK-LABEL: test_i16_split:
++; CHECK: ldrh [[VAL2:r[0-9]+]], [sp]
++; CHECK: add r0, r2, [[VAL2]]
++ %val0 = extractvalue [3 x i16] %arg, 0
++ %val2 = extractvalue [3 x i16] %arg, 2
++ %sum = add i16 %val0, %val2
++ ret i16 %sum
++}
++
++; Beware: on the stack each i16 still gets a 32-bit slot, the array is not
++; packed.
++define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg) {
++; CHECK-LABEL: test_i16_forced_stack:
++; CHECK-DAG: ldrh [[VAL0:r[0-9]+]], [sp, #8]
++; CHECK-DAG: ldrh [[VAL2:r[0-9]+]], [sp, #16]
++; CHECK: add r0, [[VAL0]], [[VAL2]]
++ %val0 = extractvalue [3 x i16] %arg, 0
++ %val2 = extractvalue [3 x i16] %arg, 2
++ %sum = add i16 %val0, %val2
++ ret i16 %sum
++}
diff --git a/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff b/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff
new file mode 100644
index 0000000..33ca358
--- /dev/null
+++ b/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff
@@ -0,0 +1,50 @@
+Pull in r227115 from upstream clang trunk (by Ben Langmuir):
+
+ Fix assert instantiating string init of static variable
+
+ ... when the variable's type is a typedef of a ConstantArrayType. Just
+ look through the typedef (and any other sugar). We only use the
+ constant array type here to get the element count.
+
+This fixes an assertion failure when building the games/redeclipse port.
+
+Introduced here: http://svnweb.freebsd.org/changeset/base/281046
+
+Index: tools/clang/lib/Sema/SemaInit.cpp
+===================================================================
+--- tools/clang/lib/Sema/SemaInit.cpp
++++ tools/clang/lib/Sema/SemaInit.cpp
+@@ -149,10 +149,10 @@ static void updateStringLiteralType(Expr *E, QualT
+ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
+ Sema &S) {
+ // Get the length of the string as parsed.
+- uint64_t StrLength =
+- cast<ConstantArrayType>(Str->getType())->getSize().getZExtValue();
++ auto *ConstantArrayTy =
++ cast<ConstantArrayType>(Str->getType()->getUnqualifiedDesugaredType());
++ uint64_t StrLength = ConstantArrayTy->getSize().getZExtValue();
+
+-
+ if (const IncompleteArrayType *IAT = dyn_cast<IncompleteArrayType>(AT)) {
+ // C99 6.7.8p14. We have an array of character type with unknown size
+ // being initialized to a string literal.
+Index: tools/clang/test/SemaTemplate/instantiate-static-var.cpp
+===================================================================
+--- tools/clang/test/SemaTemplate/instantiate-static-var.cpp
++++ tools/clang/test/SemaTemplate/instantiate-static-var.cpp
+@@ -114,3 +114,15 @@ namespace PR6449 {
+ template class X1<char>;
+
+ }
++
++typedef char MyString[100];
++template <typename T>
++struct StaticVarWithTypedefString {
++ static MyString str;
++};
++template <typename T>
++MyString StaticVarWithTypedefString<T>::str = "";
++
++void testStaticVarWithTypedefString() {
++ (void)StaticVarWithTypedefString<int>::str;
++}
OpenPOWER on IntegriCloud