summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/patches
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-05-30 15:36:23 +0000
committerdim <dim@FreeBSD.org>2015-05-30 15:36:23 +0000
commit3cd22c5584a700a2036b948ef96ae41bbd233bb6 (patch)
treee9f6288dce1b090572b6ade998862abc53e54b80 /contrib/llvm/patches
parente3e0f940d5a51d987e16f07a7e27c1a99c8972c2 (diff)
downloadFreeBSD-src-3cd22c5584a700a2036b948ef96ae41bbd233bb6.zip
FreeBSD-src-3cd22c5584a700a2036b948ef96ae41bbd233bb6.tar.gz
Drop llvm/clang patches which are no longer necessary.
Diffstat (limited to 'contrib/llvm/patches')
-rw-r--r--contrib/llvm/patches/README.TXT8
-rw-r--r--contrib/llvm/patches/patch-01-clang-vendor-suffix.diff (renamed from contrib/llvm/patches/patch-02-clang-vendor-suffix.diff)0
-rw-r--r--contrib/llvm/patches/patch-01-freebsd-kprintf.diff381
-rw-r--r--contrib/llvm/patches/patch-02-add-CC-aliases.diff (renamed from contrib/llvm/patches/patch-03-add-CC-aliases.diff)0
-rw-r--r--contrib/llvm/patches/patch-03-enable-armv6-clrex.diff (renamed from contrib/llvm/patches/patch-05-enable-armv6-clrex.diff)0
-rw-r--r--contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff31
-rw-r--r--contrib/llvm/patches/patch-04-clang-add-mips-triples.diff (renamed from contrib/llvm/patches/patch-06-clang-add-mips-triples.diff)0
-rw-r--r--contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff1271
-rw-r--r--contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff419
-rw-r--r--contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff50
10 files changed, 4 insertions, 2156 deletions
diff --git a/contrib/llvm/patches/README.TXT b/contrib/llvm/patches/README.TXT
index 7bc26d2..220baf0 100644
--- a/contrib/llvm/patches/README.TXT
+++ b/contrib/llvm/patches/README.TXT
@@ -1,11 +1,11 @@
This is a set of individual patches, which contain all the customizations to
llvm/clang currently in the FreeBSD base system. These can be applied in
-alphabetical order to a pristine llvm/clang 3.6.1 source tree, for example by
+alphabetical order to a pristine llvm/clang 3.7.0 source tree, for example by
doing:
-svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_361/final llvm-3.6.1
-svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_361/final llvm-3.6.1/tools/clang
-cd llvm-3.6.1
+svn co https://llvm.org/svn/llvm-project/llvm/trunk llvm-3.7.0
+svn co https://llvm.org/svn/llvm-project/cfe/trunk llvm-3.7.0/tools/clang
+cd llvm-3.7.0
for p in /usr/src/contrib/llvm/patches/patch-*.diff; do
patch -p0 -f -F0 -E -i $p -s || break
done
diff --git a/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff b/contrib/llvm/patches/patch-01-clang-vendor-suffix.diff
index f94b9f3..f94b9f3 100644
--- a/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff
+++ b/contrib/llvm/patches/patch-01-clang-vendor-suffix.diff
diff --git a/contrib/llvm/patches/patch-01-freebsd-kprintf.diff b/contrib/llvm/patches/patch-01-freebsd-kprintf.diff
deleted file mode 100644
index 252b4cd..0000000
--- a/contrib/llvm/patches/patch-01-freebsd-kprintf.diff
+++ /dev/null
@@ -1,381 +0,0 @@
-This patch adds support for the FreeBSD kernel specific printf format
-specifiers: %b, %D, %r and %y, via a new __freebsd_kprintf__ format
-string type.
-
-Sent upstream as http://reviews.llvm.org/D7154
-
-Index: tools/clang/include/clang/Analysis/Analyses/FormatString.h
-===================================================================
---- tools/clang/include/clang/Analysis/Analyses/FormatString.h
-+++ tools/clang/include/clang/Analysis/Analyses/FormatString.h
-@@ -161,6 +161,12 @@ class ConversionSpecifier {
- ObjCObjArg, // '@'
- ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
-
-+ // FreeBSD kernel specific specifiers.
-+ FreeBSDbArg,
-+ FreeBSDDArg,
-+ FreeBSDrArg,
-+ FreeBSDyArg,
-+
- // GlibC specific specifiers.
- PrintErrno, // 'm'
-
-@@ -204,7 +210,8 @@ class ConversionSpecifier {
- return EndScanList ? EndScanList - Position : 1;
- }
-
-- bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
-+ bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) ||
-+ kind == FreeBSDrArg || kind == FreeBSDyArg; }
- bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
- bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
- const char *toString() const;
-@@ -646,7 +653,7 @@ class FormatStringHandler {
-
- bool ParsePrintfString(FormatStringHandler &H,
- const char *beg, const char *end, const LangOptions &LO,
-- const TargetInfo &Target);
-+ const TargetInfo &Target, bool isFreeBSDKPrintf);
-
- bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO,
- const TargetInfo &Target);
-Index: tools/clang/include/clang/Sema/Sema.h
-===================================================================
---- tools/clang/include/clang/Sema/Sema.h
-+++ tools/clang/include/clang/Sema/Sema.h
-@@ -8567,6 +8567,7 @@ class Sema {
- FST_Strftime,
- FST_Strfmon,
- FST_Kprintf,
-+ FST_FreeBSDKPrintf,
- FST_Unknown
- };
- static FormatStringType GetFormatStringType(const FormatAttr *Format);
-Index: tools/clang/lib/Analysis/FormatString.cpp
-===================================================================
---- tools/clang/lib/Analysis/FormatString.cpp
-+++ tools/clang/lib/Analysis/FormatString.cpp
-@@ -552,6 +552,12 @@ const char *ConversionSpecifier::toString() const
- // Objective-C specific specifiers.
- case ObjCObjArg: return "@";
-
-+ // FreeBSD kernel specific specifiers.
-+ case FreeBSDbArg: return "b";
-+ case FreeBSDDArg: return "D";
-+ case FreeBSDrArg: return "r";
-+ case FreeBSDyArg: return "y";
-+
- // GlibC specific specifiers.
- case PrintErrno: return "m";
-
-@@ -647,6 +653,9 @@ bool FormatSpecifier::hasValidLengthModifier(const
- case ConversionSpecifier::XArg:
- case ConversionSpecifier::nArg:
- return true;
-+ case ConversionSpecifier::FreeBSDrArg:
-+ case ConversionSpecifier::FreeBSDyArg:
-+ return Target.getTriple().isOSFreeBSD();
- default:
- return false;
- }
-@@ -677,6 +686,9 @@ bool FormatSpecifier::hasValidLengthModifier(const
- case ConversionSpecifier::ScanListArg:
- case ConversionSpecifier::ZArg:
- return true;
-+ case ConversionSpecifier::FreeBSDrArg:
-+ case ConversionSpecifier::FreeBSDyArg:
-+ return Target.getTriple().isOSFreeBSD();
- default:
- return false;
- }
-@@ -807,6 +819,10 @@ bool FormatSpecifier::hasStandardConversionSpecifi
- case ConversionSpecifier::SArg:
- return LangOpt.ObjC1 || LangOpt.ObjC2;
- case ConversionSpecifier::InvalidSpecifier:
-+ case ConversionSpecifier::FreeBSDbArg:
-+ case ConversionSpecifier::FreeBSDDArg:
-+ case ConversionSpecifier::FreeBSDrArg:
-+ case ConversionSpecifier::FreeBSDyArg:
- case ConversionSpecifier::PrintErrno:
- case ConversionSpecifier::DArg:
- case ConversionSpecifier::OArg:
-Index: tools/clang/lib/Analysis/PrintfFormatString.cpp
-===================================================================
---- tools/clang/lib/Analysis/PrintfFormatString.cpp
-+++ tools/clang/lib/Analysis/PrintfFormatString.cpp
-@@ -55,7 +55,8 @@ static PrintfSpecifierResult ParsePrintfSpecifier(
- unsigned &argIndex,
- const LangOptions &LO,
- const TargetInfo &Target,
-- bool Warn) {
-+ bool Warn,
-+ bool isFreeBSDKPrintf) {
-
- using namespace clang::analyze_format_string;
- using namespace clang::analyze_printf;
-@@ -206,9 +207,24 @@ static PrintfSpecifierResult ParsePrintfSpecifier(
- case '@': k = ConversionSpecifier::ObjCObjArg; break;
- // Glibc specific.
- case 'm': k = ConversionSpecifier::PrintErrno; break;
-+ // FreeBSD kernel specific.
-+ case 'b':
-+ if (isFreeBSDKPrintf)
-+ k = ConversionSpecifier::FreeBSDbArg; // int followed by char *
-+ break;
-+ case 'r':
-+ if (isFreeBSDKPrintf)
-+ k = ConversionSpecifier::FreeBSDrArg; // int
-+ break;
-+ case 'y':
-+ if (isFreeBSDKPrintf)
-+ k = ConversionSpecifier::FreeBSDyArg; // int
-+ break;
- // Apple-specific.
- case 'D':
-- if (Target.getTriple().isOSDarwin())
-+ if (isFreeBSDKPrintf)
-+ k = ConversionSpecifier::FreeBSDDArg; // void * followed by char *
-+ else if (Target.getTriple().isOSDarwin())
- k = ConversionSpecifier::DArg;
- break;
- case 'O':
-@@ -228,6 +244,10 @@ static PrintfSpecifierResult ParsePrintfSpecifier(
- FS.setConversionSpecifier(CS);
- if (CS.consumesDataArgument() && !FS.usesPositionalArg())
- FS.setArgIndex(argIndex++);
-+ // FreeBSD kernel specific.
-+ if (k == ConversionSpecifier::FreeBSDbArg ||
-+ k == ConversionSpecifier::FreeBSDDArg)
-+ argIndex++;
-
- if (k == ConversionSpecifier::InvalidSpecifier) {
- // Assume the conversion takes one argument.
-@@ -240,7 +260,8 @@ bool clang::analyze_format_string::ParsePrintfStri
- const char *I,
- const char *E,
- const LangOptions &LO,
-- const TargetInfo &Target) {
-+ const TargetInfo &Target,
-+ bool isFreeBSDKPrintf) {
-
- unsigned argIndex = 0;
-
-@@ -247,7 +268,8 @@ bool clang::analyze_format_string::ParsePrintfStri
- // Keep looking for a format specifier until we have exhausted the string.
- while (I != E) {
- const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
-- LO, Target, true);
-+ LO, Target, true,
-+ isFreeBSDKPrintf);
- // Did a fail-stop error of any kind occur when parsing the specifier?
- // If so, don't do any more processing.
- if (FSR.shouldStop())
-@@ -276,7 +298,8 @@ bool clang::analyze_format_string::ParseFormatStri
- FormatStringHandler H;
- while (I != E) {
- const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex,
-- LO, Target, false);
-+ LO, Target, false,
-+ false);
- // Did a fail-stop error of any kind occur when parsing the specifier?
- // If so, don't do any more processing.
- if (FSR.shouldStop())
-@@ -674,6 +697,8 @@ bool PrintfSpecifier::hasValidPlusPrefix() const {
- case ConversionSpecifier::GArg:
- case ConversionSpecifier::aArg:
- case ConversionSpecifier::AArg:
-+ case ConversionSpecifier::FreeBSDrArg:
-+ case ConversionSpecifier::FreeBSDyArg:
- return true;
-
- default:
-@@ -699,6 +724,8 @@ bool PrintfSpecifier::hasValidAlternativeForm() co
- case ConversionSpecifier::FArg:
- case ConversionSpecifier::gArg:
- case ConversionSpecifier::GArg:
-+ case ConversionSpecifier::FreeBSDrArg:
-+ case ConversionSpecifier::FreeBSDyArg:
- return true;
-
- default:
-@@ -729,6 +756,8 @@ bool PrintfSpecifier::hasValidLeadingZeros() const
- case ConversionSpecifier::FArg:
- case ConversionSpecifier::gArg:
- case ConversionSpecifier::GArg:
-+ case ConversionSpecifier::FreeBSDrArg:
-+ case ConversionSpecifier::FreeBSDyArg:
- return true;
-
- default:
-@@ -753,6 +782,8 @@ bool PrintfSpecifier::hasValidSpacePrefix() const
- case ConversionSpecifier::GArg:
- case ConversionSpecifier::aArg:
- case ConversionSpecifier::AArg:
-+ case ConversionSpecifier::FreeBSDrArg:
-+ case ConversionSpecifier::FreeBSDyArg:
- return true;
-
- default:
-@@ -818,6 +849,8 @@ bool PrintfSpecifier::hasValidPrecision() const {
- case ConversionSpecifier::gArg:
- case ConversionSpecifier::GArg:
- case ConversionSpecifier::sArg:
-+ case ConversionSpecifier::FreeBSDrArg:
-+ case ConversionSpecifier::FreeBSDyArg:
- return true;
-
- default:
-Index: tools/clang/lib/Sema/SemaChecking.cpp
-===================================================================
---- tools/clang/lib/Sema/SemaChecking.cpp
-+++ tools/clang/lib/Sema/SemaChecking.cpp
-@@ -2603,6 +2603,7 @@ Sema::FormatStringType Sema::GetFormatStringType(c
- .Case("strftime", FST_Strftime)
- .Case("strfmon", FST_Strfmon)
- .Cases("kprintf", "cmn_err", "vcmn_err", "zcmn_err", FST_Kprintf)
-+ .Case("freebsd_kprintf", FST_FreeBSDKPrintf)
- .Default(FST_Unknown);
- }
-
-@@ -3384,6 +3385,43 @@ CheckPrintfHandler::HandlePrintfSpecifier(const an
- CoveredArgs.set(argIndex);
- }
-
-+ // FreeBSD kernel extensions.
-+ if (CS.getKind() == ConversionSpecifier::FreeBSDbArg ||
-+ CS.getKind() == ConversionSpecifier::FreeBSDDArg) {
-+ // We need at least two arguments.
-+ if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex + 1))
-+ return false;
-+
-+ // Claim the second argument.
-+ CoveredArgs.set(argIndex + 1);
-+
-+ // Type check the first argument (int for %b, pointer for %D)
-+ const Expr *Ex = getDataArg(argIndex);
-+ const analyze_printf::ArgType &AT =
-+ (CS.getKind() == ConversionSpecifier::FreeBSDbArg) ?
-+ ArgType(S.Context.IntTy) : ArgType::CPointerTy;
-+ if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType()))
-+ EmitFormatDiagnostic(
-+ S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
-+ << AT.getRepresentativeTypeName(S.Context) << Ex->getType()
-+ << false << Ex->getSourceRange(),
-+ Ex->getLocStart(), /*IsStringLocation*/false,
-+ getSpecifierRange(startSpecifier, specifierLen));
-+
-+ // Type check the second argument (char * for both %b and %D)
-+ Ex = getDataArg(argIndex + 1);
-+ const analyze_printf::ArgType &AT2 = ArgType::CStrTy;
-+ if (AT2.isValid() && !AT2.matchesType(S.Context, Ex->getType()))
-+ EmitFormatDiagnostic(
-+ S.PDiag(diag::warn_format_conversion_argument_type_mismatch)
-+ << AT2.getRepresentativeTypeName(S.Context) << Ex->getType()
-+ << false << Ex->getSourceRange(),
-+ Ex->getLocStart(), /*IsStringLocation*/false,
-+ getSpecifierRange(startSpecifier, specifierLen));
-+
-+ return true;
-+ }
-+
- // Check for using an Objective-C specific conversion specifier
- // in a non-ObjC literal.
- if (!ObjCContext && CS.isObjCArg()) {
-@@ -4007,7 +4045,8 @@ void Sema::CheckFormatString(const StringLiteral *
- return;
- }
-
-- if (Type == FST_Printf || Type == FST_NSString) {
-+ if (Type == FST_Printf || Type == FST_NSString ||
-+ Type == FST_FreeBSDKPrintf) {
- CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
- numDataArgs, (Type == FST_NSString),
- Str, HasVAListArg, Args, format_idx,
-@@ -4015,7 +4054,8 @@ void Sema::CheckFormatString(const StringLiteral *
-
- if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen,
- getLangOpts(),
-- Context.getTargetInfo()))
-+ Context.getTargetInfo(),
-+ Type == FST_FreeBSDKPrintf))
- H.DoneProcessing();
- } else if (Type == FST_Scanf) {
- CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, numDataArgs,
-Index: tools/clang/lib/Sema/SemaDeclAttr.cpp
-===================================================================
---- tools/clang/lib/Sema/SemaDeclAttr.cpp
-+++ tools/clang/lib/Sema/SemaDeclAttr.cpp
-@@ -2481,6 +2481,7 @@ static FormatAttrKind getFormatAttrKind(StringRef
- .Cases("scanf", "printf", "printf0", "strfmon", SupportedFormat)
- .Cases("cmn_err", "vcmn_err", "zcmn_err", SupportedFormat)
- .Case("kprintf", SupportedFormat) // OpenBSD.
-+ .Case("freebsd_kprintf", SupportedFormat) // FreeBSD.
-
- .Cases("gcc_diag", "gcc_cdiag", "gcc_cxxdiag", "gcc_tdiag", IgnoredFormat)
- .Default(InvalidFormat);
-Index: tools/clang/test/Sema/attr-format.c
-===================================================================
---- tools/clang/test/Sema/attr-format.c
-+++ tools/clang/test/Sema/attr-format.c
-@@ -57,8 +57,15 @@ void callnull(void){
- null(0, (int*)0); // expected-warning {{incompatible pointer types}}
- }
-
-+// FreeBSD kernel extensions
-+void a3(const char *a, ...) __attribute__((format(freebsd_kprintf, 1,2))); // no-error
-+void b3(const char *a, ...) __attribute__((format(freebsd_kprintf, 1,1))); // expected-error {{'format' attribute parameter 3 is out of bounds}}
-+void c3(const char *a, ...) __attribute__((format(freebsd_kprintf, 0,2))); // expected-error {{'format' attribute parameter 2 is out of bounds}}
-+void d3(const char *a, int c) __attribute__((format(freebsd_kprintf, 1,2))); // expected-error {{format attribute requires variadic function}}
-+void e3(char *str, int c, ...) __attribute__((format(freebsd_kprintf, 2,3))); // expected-error {{format argument not a string type}}
-
-
-+
- // PR4470
- int xx_vprintf(const char *, va_list);
-
-Index: tools/clang/test/Sema/format-strings-freebsd.c
-===================================================================
---- tools/clang/test/Sema/format-strings-freebsd.c
-+++ tools/clang/test/Sema/format-strings-freebsd.c
-@@ -0,0 +1,40 @@
-+// RUN: %clang_cc1 -fsyntax-only -verify -triple i386-unknown-freebsd %s
-+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-unknown-freebsd %s
-+
-+// Test FreeBSD kernel printf extensions.
-+int freebsd_kernel_printf(const char *, ...) __attribute__((__format__(__freebsd_kprintf__, 1, 2)));
-+
-+void check_freebsd_kernel_extensions(int i, long l, char *s)
-+{
-+ // %b expects an int and a char *
-+ freebsd_kernel_printf("reg=%b\n", i, "\10\2BITTWO\1BITONE\n"); // no-warning
-+ freebsd_kernel_printf("reg=%b\n", l, "\10\2BITTWO\1BITONE\n"); // expected-warning{{format specifies type 'int' but the argument has type 'long'}}
-+ freebsd_kernel_printf("reg=%b\n", i, l); // expected-warning{{format specifies type 'char *' but the argument has type 'long'}}
-+ freebsd_kernel_printf("reg=%b\n", i); // expected-warning{{more '%' conversions than data arguments}}
-+ freebsd_kernel_printf("reg=%b\n", i, "\10\2BITTWO\1BITONE\n", l); // expected-warning{{data argument not used by format string}}
-+
-+ // %D expects an unsigned char * and a char *
-+ freebsd_kernel_printf("%6D", s, ":"); // no-warning
-+ freebsd_kernel_printf("%6D", i, ":"); // expected-warning{{format specifies type 'void *' but the argument has type 'int'}}
-+ freebsd_kernel_printf("%6D", s, i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
-+ freebsd_kernel_printf("%6D", s); // expected-warning{{more '%' conversions than data arguments}}
-+ freebsd_kernel_printf("%6D", s, ":", i); // expected-warning{{data argument not used by format string}}
-+
-+ freebsd_kernel_printf("%*D", 42, s, ":"); // no-warning
-+ freebsd_kernel_printf("%*D", 42, i, ":"); // expected-warning{{format specifies type 'void *' but the argument has type 'int'}}
-+ freebsd_kernel_printf("%*D", 42, s, i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
-+ freebsd_kernel_printf("%*D", 42, s); // expected-warning{{more '%' conversions than data arguments}}
-+ freebsd_kernel_printf("%*D", 42, s, ":", i); // expected-warning{{data argument not used by format string}}
-+
-+ // %r expects an int
-+ freebsd_kernel_printf("%r", i); // no-warning
-+ freebsd_kernel_printf("%r", l); // expected-warning{{format specifies type 'int' but the argument has type 'long'}}
-+ freebsd_kernel_printf("%lr", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}}
-+ freebsd_kernel_printf("%lr", l); // no-warning
-+
-+ // %y expects an int
-+ freebsd_kernel_printf("%y", i); // no-warning
-+ freebsd_kernel_printf("%y", l); // expected-warning{{format specifies type 'int' but the argument has type 'long'}}
-+ freebsd_kernel_printf("%ly", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}}
-+ freebsd_kernel_printf("%ly", l); // no-warning
-+}
diff --git a/contrib/llvm/patches/patch-03-add-CC-aliases.diff b/contrib/llvm/patches/patch-02-add-CC-aliases.diff
index 884b14c..884b14c 100644
--- a/contrib/llvm/patches/patch-03-add-CC-aliases.diff
+++ b/contrib/llvm/patches/patch-02-add-CC-aliases.diff
diff --git a/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff b/contrib/llvm/patches/patch-03-enable-armv6-clrex.diff
index 574e3bd..574e3bd 100644
--- a/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff
+++ b/contrib/llvm/patches/patch-03-enable-armv6-clrex.diff
diff --git a/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff b/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff
deleted file mode 100644
index 5f31623..0000000
--- a/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff
+++ /dev/null
@@ -1,31 +0,0 @@
-Add an llvm option to enable/disable running the global value numbering
-optimization pass. Disabling this pass helps to minimize the size of
-boot2.
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/274968
-
-Index: lib/Transforms/IPO/PassManagerBuilder.cpp
-===================================================================
---- lib/Transforms/IPO/PassManagerBuilder.cpp
-+++ lib/Transforms/IPO/PassManagerBuilder.cpp
-@@ -78,6 +78,10 @@ static cl::opt<bool>
- EnableMLSM("mlsm", cl::init(true), cl::Hidden,
- cl::desc("Enable motion of merged load and store"));
-
-+static cl::opt<bool> EnableGVN("enable-gvn",
-+ cl::init(true), cl::Hidden,
-+ cl::desc("Run the global value numbering pass"));
-+
- PassManagerBuilder::PassManagerBuilder() {
- OptLevel = 2;
- SizeLevel = 0;
-@@ -244,7 +248,8 @@ void PassManagerBuilder::populateModulePassManager
- if (OptLevel > 1) {
- if (EnableMLSM)
- MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
-- MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
-+ if (EnableGVN)
-+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
- }
- MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
- MPM.add(createSCCPPass()); // Constant prop with SCCP
diff --git a/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff b/contrib/llvm/patches/patch-04-clang-add-mips-triples.diff
index 2a66949..2a66949 100644
--- a/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff
+++ b/contrib/llvm/patches/patch-04-clang-add-mips-triples.diff
diff --git a/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff b/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff
deleted file mode 100644
index 57e16d7..0000000
--- a/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff
+++ /dev/null
@@ -1,1271 +0,0 @@
-Pull in r227752 from upstream llvm trunk (by Michael Kuperstein):
-
- [X86] Convert esp-relative movs of function arguments to pushes, step 2
-
- This moves the transformation introduced in r223757 into a separate MI pass.
- This allows it to cover many more cases (not only cases where there must be a
- reserved call frame), and perform rudimentary call folding. It still doesn't
- have a heuristic, so it is enabled only for optsize/minsize, with stack
- alignment <= 8, where it ought to be a fairly clear win.
-
- (Re-commit of r227728)
-
- Differential Revision: http://reviews.llvm.org/D6789
-
-This helps to get sys/boot/i386/boot2 below the required size again,
-when optimizing with -Oz.
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/278112
-
-Index: include/llvm/Target/TargetFrameLowering.h
-===================================================================
---- include/llvm/Target/TargetFrameLowering.h
-+++ include/llvm/Target/TargetFrameLowering.h
-@@ -193,6 +193,11 @@ class TargetFrameLowering {
- return hasReservedCallFrame(MF) || hasFP(MF);
- }
-
-+ // needsFrameIndexResolution - Do we need to perform FI resolution for
-+ // this function. Normally, this is required only when the function
-+ // has any stack objects. However, targets may want to override this.
-+ virtual bool needsFrameIndexResolution(const MachineFunction &MF) const;
-+
- /// getFrameIndexOffset - Returns the displacement from the frame register to
- /// the stack frame of the specified index.
- virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-Index: lib/CodeGen/PrologEpilogInserter.cpp
-===================================================================
---- lib/CodeGen/PrologEpilogInserter.cpp
-+++ lib/CodeGen/PrologEpilogInserter.cpp
-@@ -703,7 +703,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &
- /// register references and actual offsets.
- ///
- void PEI::replaceFrameIndices(MachineFunction &Fn) {
-- if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
-+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
-+ if (!TFI.needsFrameIndexResolution(Fn)) return;
-
- // Store SPAdj at exit of a basic block.
- SmallVector<int, 8> SPState;
-@@ -769,13 +770,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B
- continue;
- }
-
-- // If we are looking at a call sequence, we need to keep track of
-- // the SP adjustment made by each instruction in the sequence.
-- // This includes both the frame setup/destroy pseudos (handled above),
-- // as well as other instructions that have side effects w.r.t the SP.
-- if (InsideCallSequence)
-- SPAdj += TII.getSPAdjust(I);
--
- MachineInstr *MI = I;
- bool DoIncr = true;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-@@ -854,6 +848,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B
- break;
- }
-
-+ // If we are looking at a call sequence, we need to keep track of
-+ // the SP adjustment made by each instruction in the sequence.
-+ // This includes both the frame setup/destroy pseudos (handled above),
-+ // as well as other instructions that have side effects w.r.t the SP.
-+ // Note that this must come after eliminateFrameIndex, because
-+ // if I itself referred to a frame index, we shouldn't count its own
-+ // adjustment.
-+ if (MI && InsideCallSequence)
-+ SPAdj += TII.getSPAdjust(MI);
-+
- if (DoIncr && I != BB->end()) ++I;
-
- // Update register states.
-Index: lib/CodeGen/TargetFrameLoweringImpl.cpp
-===================================================================
---- lib/CodeGen/TargetFrameLoweringImpl.cpp
-+++ lib/CodeGen/TargetFrameLoweringImpl.cpp
-@@ -42,3 +42,8 @@ int TargetFrameLowering::getFrameIndexReference(co
- FrameReg = RI->getFrameRegister(MF);
- return getFrameIndexOffset(MF, FI);
- }
-+
-+bool TargetFrameLowering::needsFrameIndexResolution(
-+ const MachineFunction &MF) const {
-+ return MF.getFrameInfo()->hasStackObjects();
-+}
-Index: lib/Target/X86/CMakeLists.txt
-===================================================================
---- lib/Target/X86/CMakeLists.txt
-+++ lib/Target/X86/CMakeLists.txt
-@@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen)
-
- set(sources
- X86AsmPrinter.cpp
-+ X86CallFrameOptimization.cpp
- X86FastISel.cpp
- X86FloatingPoint.cpp
- X86FrameLowering.cpp
-Index: lib/Target/X86/X86.h
-===================================================================
---- lib/Target/X86/X86.h
-+++ lib/Target/X86/X86.h
-@@ -67,6 +67,11 @@ FunctionPass *createX86PadShortFunctions();
- /// to eliminate execution delays in some Atom processors.
- FunctionPass *createX86FixupLEAs();
-
-+/// createX86CallFrameOptimization - Return a pass that optimizes
-+/// the code-size of x86 call sequences. This is done by replacing
-+/// esp-relative movs with pushes.
-+FunctionPass *createX86CallFrameOptimization();
-+
- } // End llvm namespace
-
- #endif
-Index: lib/Target/X86/X86CallFrameOptimization.cpp
-===================================================================
---- lib/Target/X86/X86CallFrameOptimization.cpp
-+++ lib/Target/X86/X86CallFrameOptimization.cpp
-@@ -0,0 +1,400 @@
-+//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines a pass that optimizes call sequences on x86.
-+// Currently, it converts movs of function parameters onto the stack into
-+// pushes. This is beneficial for two main reasons:
-+// 1) The push instruction encoding is much smaller than an esp-relative mov
-+// 2) It is possible to push memory arguments directly. So, if the
-+// the transformation is preformed pre-reg-alloc, it can help relieve
-+// register pressure.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include <algorithm>
-+
-+#include "X86.h"
-+#include "X86InstrInfo.h"
-+#include "X86Subtarget.h"
-+#include "X86MachineFunctionInfo.h"
-+#include "llvm/ADT/Statistic.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/CodeGen/Passes.h"
-+#include "llvm/IR/Function.h"
-+#include "llvm/Support/Debug.h"
-+#include "llvm/Support/raw_ostream.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+
-+using namespace llvm;
-+
-+#define DEBUG_TYPE "x86-cf-opt"
-+
-+cl::opt<bool> NoX86CFOpt("no-x86-call-frame-opt",
-+ cl::desc("Avoid optimizing x86 call frames for size"),
-+ cl::init(false), cl::Hidden);
-+
-+namespace {
-+class X86CallFrameOptimization : public MachineFunctionPass {
-+public:
-+ X86CallFrameOptimization() : MachineFunctionPass(ID) {}
-+
-+ bool runOnMachineFunction(MachineFunction &MF) override;
-+
-+private:
-+ bool shouldPerformTransformation(MachineFunction &MF);
-+
-+ bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I);
-+
-+ MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
-+ unsigned Reg);
-+
-+ const char *getPassName() const override {
-+ return "X86 Optimize Call Frame";
-+ }
-+
-+ const TargetInstrInfo *TII;
-+ const TargetFrameLowering *TFL;
-+ const MachineRegisterInfo *MRI;
-+ static char ID;
-+};
-+
-+char X86CallFrameOptimization::ID = 0;
-+}
-+
-+FunctionPass *llvm::createX86CallFrameOptimization() {
-+ return new X86CallFrameOptimization();
-+}
-+
-+// This checks whether the transformation is legal and profitable
-+bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF) {
-+ if (NoX86CFOpt.getValue())
-+ return false;
-+
-+ // We currently only support call sequences where *all* parameters.
-+ // are passed on the stack.
-+ // No point in running this in 64-bit mode, since some arguments are
-+ // passed in-register in all common calling conventions, so the pattern
-+ // we're looking for will never match.
-+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
-+ if (STI.is64Bit())
-+ return false;
-+
-+ // You would expect straight-line code between call-frame setup and
-+ // call-frame destroy. You would be wrong. There are circumstances (e.g.
-+ // CMOV_GR8 expansion of a select that feeds a function call!) where we can
-+ // end up with the setup and the destroy in different basic blocks.
-+ // This is bad, and breaks SP adjustment.
-+ // So, check that all of the frames in the function are closed inside
-+ // the same block, and, for good measure, that there are no nested frames.
-+ int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
-+ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
-+ for (MachineBasicBlock &BB : MF) {
-+ bool InsideFrameSequence = false;
-+ for (MachineInstr &MI : BB) {
-+ if (MI.getOpcode() == FrameSetupOpcode) {
-+ if (InsideFrameSequence)
-+ return false;
-+ InsideFrameSequence = true;
-+ }
-+ else if (MI.getOpcode() == FrameDestroyOpcode) {
-+ if (!InsideFrameSequence)
-+ return false;
-+ InsideFrameSequence = false;
-+ }
-+ }
-+
-+ if (InsideFrameSequence)
-+ return false;
-+ }
-+
-+ // Now that we know the transformation is legal, check if it is
-+ // profitable.
-+ // TODO: Add a heuristic that actually looks at the function,
-+ // and enable this for more cases.
-+
-+ // This transformation is always a win when we expected to have
-+ // a reserved call frame. Under other circumstances, it may be either
-+ // a win or a loss, and requires a heuristic.
-+ // For now, enable it only for the relatively clear win cases.
-+ bool CannotReserveFrame = MF.getFrameInfo()->hasVarSizedObjects();
-+ if (CannotReserveFrame)
-+ return true;
-+
-+ // For now, don't even try to evaluate the profitability when
-+ // not optimizing for size.
-+ AttributeSet FnAttrs = MF.getFunction()->getAttributes();
-+ bool OptForSize =
-+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
-+ Attribute::OptimizeForSize) ||
-+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
-+
-+ if (!OptForSize)
-+ return false;
-+
-+ // Stack re-alignment can make this unprofitable even in terms of size.
-+ // As mentioned above, a better heuristic is needed. For now, don't do this
-+ // when the required alignment is above 8. (4 would be the safe choice, but
-+ // some experimentation showed 8 is generally good).
-+ if (TFL->getStackAlignment() > 8)
-+ return false;
-+
-+ return true;
-+}
-+
-+bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
-+ TII = MF.getSubtarget().getInstrInfo();
-+ TFL = MF.getSubtarget().getFrameLowering();
-+ MRI = &MF.getRegInfo();
-+
-+ if (!shouldPerformTransformation(MF))
-+ return false;
-+
-+ int FrameSetupOpcode = TII->getCallFrameSetupOpcode();
-+
-+ bool Changed = false;
-+
-+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
-+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
-+ if (I->getOpcode() == FrameSetupOpcode)
-+ Changed |= adjustCallSequence(MF, *BB, I);
-+
-+ return Changed;
-+}
-+
-+bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
-+ MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator I) {
-+
-+ // Check that this particular call sequence is amenable to the
-+ // transformation.
-+ const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-+ MF.getSubtarget().getRegisterInfo());
-+ unsigned StackPtr = RegInfo.getStackRegister();
-+ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
-+
-+ // We expect to enter this at the beginning of a call sequence
-+ assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
-+ MachineBasicBlock::iterator FrameSetup = I++;
-+
-+
-+ // For globals in PIC mode, we can have some LEAs here.
-+ // Ignore them, they don't bother us.
-+ // TODO: Extend this to something that covers more cases.
-+ while (I->getOpcode() == X86::LEA32r)
-+ ++I;
-+
-+ // We expect a copy instruction here.
-+ // TODO: The copy instruction is a lowering artifact.
-+ // We should also support a copy-less version, where the stack
-+ // pointer is used directly.
-+ if (!I->isCopy() || !I->getOperand(0).isReg())
-+ return false;
-+ MachineBasicBlock::iterator SPCopy = I++;
-+ StackPtr = SPCopy->getOperand(0).getReg();
-+
-+ // Scan the call setup sequence for the pattern we're looking for.
-+ // We only handle a simple case - a sequence of MOV32mi or MOV32mr
-+ // instructions, that push a sequence of 32-bit values onto the stack, with
-+ // no gaps between them.
-+ SmallVector<MachineInstr*, 4> MovVector(4, nullptr);
-+ unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4;
-+ if (MaxAdjust > 4)
-+ MovVector.resize(MaxAdjust, nullptr);
-+
-+ do {
-+ int Opcode = I->getOpcode();
-+ if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
-+ break;
-+
-+ // We only want movs of the form:
-+ // movl imm/r32, k(%esp)
-+ // If we run into something else, bail.
-+ // Note that AddrBaseReg may, counter to its name, not be a register,
-+ // but rather a frame index.
-+ // TODO: Support the fi case. This should probably work now that we
-+ // have the infrastructure to track the stack pointer within a call
-+ // sequence.
-+ if (!I->getOperand(X86::AddrBaseReg).isReg() ||
-+ (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
-+ !I->getOperand(X86::AddrScaleAmt).isImm() ||
-+ (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
-+ (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
-+ (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
-+ !I->getOperand(X86::AddrDisp).isImm())
-+ return false;
-+
-+ int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
-+ assert(StackDisp >= 0 && "Negative stack displacement when passing parameters");
-+
-+ // We really don't want to consider the unaligned case.
-+ if (StackDisp % 4)
-+ return false;
-+ StackDisp /= 4;
-+
-+ assert((size_t)StackDisp < MovVector.size() &&
-+ "Function call has more parameters than the stack is adjusted for.");
-+
-+ // If the same stack slot is being filled twice, something's fishy.
-+ if (MovVector[StackDisp] != nullptr)
-+ return false;
-+ MovVector[StackDisp] = I;
-+
-+ ++I;
-+ } while (I != MBB.end());
-+
-+ // We now expect the end of the sequence - a call and a stack adjust.
-+ if (I == MBB.end())
-+ return false;
-+
-+ // For PCrel calls, we expect an additional COPY of the basereg.
-+ // If we find one, skip it.
-+ if (I->isCopy()) {
-+ if (I->getOperand(1).getReg() ==
-+ MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg())
-+ ++I;
-+ else
-+ return false;
-+ }
-+
-+ if (!I->isCall())
-+ return false;
-+ MachineBasicBlock::iterator Call = I;
-+ if ((++I)->getOpcode() != FrameDestroyOpcode)
-+ return false;
-+
-+ // Now, go through the vector, and see that we don't have any gaps,
-+ // but only a series of 32-bit MOVs.
-+
-+ int64_t ExpectedDist = 0;
-+ auto MMI = MovVector.begin(), MME = MovVector.end();
-+ for (; MMI != MME; ++MMI, ExpectedDist += 4)
-+ if (*MMI == nullptr)
-+ break;
-+
-+ // If the call had no parameters, do nothing
-+ if (!ExpectedDist)
-+ return false;
-+
-+ // We are either at the last parameter, or a gap.
-+ // Make sure it's not a gap
-+ for (; MMI != MME; ++MMI)
-+ if (*MMI != nullptr)
-+ return false;
-+
-+ // Ok, we can in fact do the transformation for this call.
-+ // Do not remove the FrameSetup instruction, but adjust the parameters.
-+ // PEI will end up finalizing the handling of this.
-+ FrameSetup->getOperand(1).setImm(ExpectedDist);
-+
-+ DebugLoc DL = I->getDebugLoc();
-+ // Now, iterate through the vector in reverse order, and replace the movs
-+ // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
-+ // replace uses.
-+ for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
-+ MachineBasicBlock::iterator MOV = *MovVector[Idx];
-+ MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
-+ if (MOV->getOpcode() == X86::MOV32mi) {
-+ unsigned PushOpcode = X86::PUSHi32;
-+ // If the operand is a small (8-bit) immediate, we can use a
-+ // PUSH instruction with a shorter encoding.
-+ // Note that isImm() may fail even though this is a MOVmi, because
-+ // the operand can also be a symbol.
-+ if (PushOp.isImm()) {
-+ int64_t Val = PushOp.getImm();
-+ if (isInt<8>(Val))
-+ PushOpcode = X86::PUSH32i8;
-+ }
-+ BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
-+ } else {
-+ unsigned int Reg = PushOp.getReg();
-+
-+ // If PUSHrmm is not slow on this target, try to fold the source of the
-+ // push into the instruction.
-+ const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
-+ bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
-+
-+ // Check that this is legal to fold. Right now, we're extremely
-+ // conservative about that.
-+ MachineInstr *DefMov = nullptr;
-+ if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
-+ MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm));
-+
-+ unsigned NumOps = DefMov->getDesc().getNumOperands();
-+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
-+ Push->addOperand(DefMov->getOperand(i));
-+
-+ DefMov->eraseFromParent();
-+ } else {
-+ BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr();
-+ }
-+ }
-+
-+ MBB.erase(MOV);
-+ }
-+
-+ // The stack-pointer copy is no longer used in the call sequences.
-+ // There should not be any other users, but we can't commit to that, so:
-+ if (MRI->use_empty(SPCopy->getOperand(0).getReg()))
-+ SPCopy->eraseFromParent();
-+
-+ // Once we've done this, we need to make sure PEI doesn't assume a reserved
-+ // frame.
-+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-+ FuncInfo->setHasPushSequences(true);
-+
-+ return true;
-+}
-+
-+MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
-+ MachineBasicBlock::iterator FrameSetup, unsigned Reg) {
-+ // Do an extremely restricted form of load folding.
-+ // ISel will often create patterns like:
-+ // movl 4(%edi), %eax
-+ // movl 8(%edi), %ecx
-+ // movl 12(%edi), %edx
-+ // movl %edx, 8(%esp)
-+ // movl %ecx, 4(%esp)
-+ // movl %eax, (%esp)
-+ // call
-+ // Get rid of those with prejudice.
-+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
-+ return nullptr;
-+
-+ // Make sure this is the only use of Reg.
-+ if (!MRI->hasOneNonDBGUse(Reg))
-+ return nullptr;
-+
-+ MachineBasicBlock::iterator DefMI = MRI->getVRegDef(Reg);
-+
-+ // Make sure the def is a MOV from memory.
-+ // If the def is an another block, give up.
-+ if (DefMI->getOpcode() != X86::MOV32rm ||
-+ DefMI->getParent() != FrameSetup->getParent())
-+ return nullptr;
-+
-+ // Be careful with movs that load from a stack slot, since it may get
-+ // resolved incorrectly.
-+ // TODO: Again, we already have the infrastructure, so this should work.
-+ if (!DefMI->getOperand(1).isReg())
-+ return nullptr;
-+
-+ // Now, make sure everything else up until the ADJCALLSTACK is a sequence
-+ // of MOVs. To be less conservative would require duplicating a lot of the
-+ // logic from PeepholeOptimizer.
-+ // FIXME: A possibly better approach would be to teach the PeepholeOptimizer
-+ // to be smarter about folding into pushes.
-+ for (auto I = DefMI; I != FrameSetup; ++I)
-+ if (I->getOpcode() != X86::MOV32rm)
-+ return nullptr;
-+
-+ return DefMI;
-+}
-Index: lib/Target/X86/X86FastISel.cpp
-===================================================================
---- lib/Target/X86/X86FastISel.cpp
-+++ lib/Target/X86/X86FastISel.cpp
-@@ -2735,7 +2735,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &
- // Issue CALLSEQ_START
- unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
-- .addImm(NumBytes);
-+ .addImm(NumBytes).addImm(0);
-
- // Walk the register/memloc assignments, inserting copies/loads.
- const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
-Index: lib/Target/X86/X86FrameLowering.cpp
-===================================================================
---- lib/Target/X86/X86FrameLowering.cpp
-+++ lib/Target/X86/X86FrameLowering.cpp
-@@ -38,9 +38,36 @@ using namespace llvm;
- extern cl::opt<bool> ForceStackAlign;
-
- bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-- return !MF.getFrameInfo()->hasVarSizedObjects();
-+ return !MF.getFrameInfo()->hasVarSizedObjects() &&
-+ !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
- }
-
-+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
-+/// call frame pseudos can be simplified. Having a FP, as in the default
-+/// implementation, is not sufficient here since we can't always use it.
-+/// Use a more nuanced condition.
-+bool
-+X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
-+ const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
-+ (MF.getSubtarget().getRegisterInfo());
-+ return hasReservedCallFrame(MF) ||
-+ (hasFP(MF) && !TRI->needsStackRealignment(MF))
-+ || TRI->hasBasePointer(MF);
-+}
-+
-+// needsFrameIndexResolution - Do we need to perform FI resolution for
-+// this function. Normally, this is required only when the function
-+// has any stack objects. However, FI resolution actually has another job,
-+// not apparent from the title - it resolves callframesetup/destroy
-+// that were not simplified earlier.
-+// So, this is required for x86 functions that have push sequences even
-+// when there are no stack objects.
-+bool
-+X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
-+ return MF.getFrameInfo()->hasStackObjects() ||
-+ MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
-+}
-+
- /// hasFP - Return true if the specified function should have a dedicated frame
- /// pointer register. This is true if the function has variable sized allocas
- /// or if frame pointer elimination is disabled.
-@@ -93,16 +120,6 @@ static unsigned getANDriOpcode(bool IsLP64, int64_
- return X86::AND32ri;
- }
-
--static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) {
-- // We don't support LP64 for now.
-- assert(!IsLP64);
--
-- if (MO.isImm() && isInt<8>(MO.getImm()))
-- return X86::PUSH32i8;
--
-- return X86::PUSHi32;;
--}
--
- static unsigned getLEArOpcode(unsigned IsLP64) {
- return IsLP64 ? X86::LEA64r : X86::LEA32r;
- }
-@@ -1882,100 +1899,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi
- #endif
- }
-
--bool X86FrameLowering::
--convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
-- MachineBasicBlock::iterator I, uint64_t Amount) const {
-- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-- const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-- MF.getSubtarget().getRegisterInfo());
-- unsigned StackPtr = RegInfo.getStackRegister();
--
-- // Scan the call setup sequence for the pattern we're looking for.
-- // We only handle a simple case now - a sequence of MOV32mi or MOV32mr
-- // instructions, that push a sequence of 32-bit values onto the stack, with
-- // no gaps.
-- std::map<int64_t, MachineBasicBlock::iterator> MovMap;
-- do {
-- int Opcode = I->getOpcode();
-- if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
-- break;
--
-- // We only want movs of the form:
-- // movl imm/r32, k(%ecx)
-- // If we run into something else, bail
-- // Note that AddrBaseReg may, counterintuitively, not be a register...
-- if (!I->getOperand(X86::AddrBaseReg).isReg() ||
-- (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
-- !I->getOperand(X86::AddrScaleAmt).isImm() ||
-- (I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
-- (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
-- (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
-- !I->getOperand(X86::AddrDisp).isImm())
-- return false;
--
-- int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
--
-- // We don't want to consider the unaligned case.
-- if (StackDisp % 4)
-- return false;
--
-- // If the same stack slot is being filled twice, something's fishy.
-- if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
-- return false;
--
-- ++I;
-- } while (I != MBB.end());
--
-- // We now expect the end of the sequence - a call and a stack adjust.
-- if (I == MBB.end())
-- return false;
-- if (!I->isCall())
-- return false;
-- MachineBasicBlock::iterator Call = I;
-- if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
-- return false;
--
-- // Now, go through the map, and see that we don't have any gaps,
-- // but only a series of 32-bit MOVs.
-- // Since std::map provides ordered iteration, the original order
-- // of the MOVs doesn't matter.
-- int64_t ExpectedDist = 0;
-- for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
-- ++MMI, ExpectedDist += 4)
-- if (MMI->first != ExpectedDist)
-- return false;
--
-- // Ok, everything looks fine. Do the transformation.
-- DebugLoc DL = I->getDebugLoc();
--
-- // It's possible the original stack adjustment amount was larger than
-- // that done by the pushes. If so, we still need a SUB.
-- Amount -= ExpectedDist;
-- if (Amount) {
-- MachineInstr* Sub = BuildMI(MBB, Call, DL,
-- TII.get(getSUBriOpcode(false, Amount)), StackPtr)
-- .addReg(StackPtr).addImm(Amount);
-- Sub->getOperand(3).setIsDead();
-- }
--
-- // Now, iterate through the map in reverse order, and replace the movs
-- // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
-- for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
-- MachineBasicBlock::iterator MOV = MMI->second;
-- MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
--
-- // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size
-- int PushOpcode = X86::PUSH32r;
-- if (MOV->getOpcode() == X86::MOV32mi)
-- PushOpcode = getPUSHiOpcode(false, PushOp);
--
-- BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp);
-- MBB.erase(MOV);
-- }
--
-- return true;
--}
--
- void X86FrameLowering::
- eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
-@@ -1990,7 +1913,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
- bool IsLP64 = STI.isTarget64BitLP64();
- DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
-- uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
-+ uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
- I = MBB.erase(I);
-
- if (!reserveCallFrame) {
-@@ -2010,24 +1933,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
- Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
-
- MachineInstr *New = nullptr;
-- if (Opcode == TII.getCallFrameSetupOpcode()) {
-- // Try to convert movs to the stack into pushes.
-- // We currently only look for a pattern that appears in 32-bit
-- // calling conventions.
-- if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
-- return;
-
-- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
-- StackPtr)
-- .addReg(StackPtr)
-- .addImm(Amount);
-- } else {
-- assert(Opcode == TII.getCallFrameDestroyOpcode());
-+ // Factor out the amount that gets handled inside the sequence
-+ // (Pushes of argument for frame setup, callee pops for frame destroy)
-+ Amount -= InternalAmt;
-
-- // Factor out the amount the callee already popped.
-- Amount -= CalleeAmt;
-+ if (Amount) {
-+ if (Opcode == TII.getCallFrameSetupOpcode()) {
-+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
-+ .addReg(StackPtr).addImm(Amount);
-+ } else {
-+ assert(Opcode == TII.getCallFrameDestroyOpcode());
-
-- if (Amount) {
- unsigned Opc = getADDriOpcode(IsLP64, Amount);
- New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr).addImm(Amount);
-@@ -2045,13 +1962,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
- return;
- }
-
-- if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
-+ if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
- // If we are performing frame pointer elimination and if the callee pops
- // something off the stack pointer, add it back. We do this until we have
- // more advanced stack pointer tracking ability.
-- unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
-+ unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
- MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
-- .addReg(StackPtr).addImm(CalleeAmt);
-+ .addReg(StackPtr).addImm(InternalAmt);
-
- // The EFLAGS implicit def is dead.
- New->getOperand(3).setIsDead();
-Index: lib/Target/X86/X86FrameLowering.h
-===================================================================
---- lib/Target/X86/X86FrameLowering.h
-+++ lib/Target/X86/X86FrameLowering.h
-@@ -66,6 +66,8 @@ class X86FrameLowering : public TargetFrameLowerin
-
- bool hasFP(const MachineFunction &MF) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const override;
-+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
-+ bool needsFrameIndexResolution(const MachineFunction &MF) const override;
-
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
-Index: lib/Target/X86/X86InstrCompiler.td
-===================================================================
---- lib/Target/X86/X86InstrCompiler.td
-+++ lib/Target/X86/X86InstrCompiler.td
-@@ -43,9 +43,9 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses
- // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
- // sub / add which can clobber EFLAGS.
- let Defs = [ESP, EFLAGS], Uses = [ESP] in {
--def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
-+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKDOWN",
-- [(X86callseq_start timm:$amt)]>,
-+ []>,
- Requires<[NotLP64]>;
- def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
-@@ -52,7 +52,10 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[NotLP64]>;
- }
-+def : Pat<(X86callseq_start timm:$amt1),
-+ (ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>;
-
-+
- // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
- // a stack adjustment and the codegen must know that they may modify the stack
- // pointer before prolog-epilog rewriting occurs.
-@@ -59,9 +62,9 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins
- // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
- // sub / add which can clobber EFLAGS.
- let Defs = [RSP, EFLAGS], Uses = [RSP] in {
--def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
-+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKDOWN",
-- [(X86callseq_start timm:$amt)]>,
-+ []>,
- Requires<[IsLP64]>;
- def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
-@@ -68,9 +71,10 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins
- [(X86callseq_end timm:$amt1, timm:$amt2)]>,
- Requires<[IsLP64]>;
- }
-+def : Pat<(X86callseq_start timm:$amt1),
-+ (ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>;
-
-
--
- // x86-64 va_start lowering magic.
- let usesCustomInserter = 1, Defs = [EFLAGS] in {
- def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
-Index: lib/Target/X86/X86InstrInfo.cpp
-===================================================================
---- lib/Target/X86/X86InstrInfo.cpp
-+++ lib/Target/X86/X86InstrInfo.cpp
-@@ -1692,6 +1692,58 @@ X86InstrInfo::isCoalescableExtInstr(const MachineI
- return false;
- }
-
-+int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const {
-+ const MachineFunction *MF = MI->getParent()->getParent();
-+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
-+
-+ if (MI->getOpcode() == getCallFrameSetupOpcode() ||
-+ MI->getOpcode() == getCallFrameDestroyOpcode()) {
-+ unsigned StackAlign = TFI->getStackAlignment();
-+ int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign *
-+ StackAlign;
-+
-+ SPAdj -= MI->getOperand(1).getImm();
-+
-+ if (MI->getOpcode() == getCallFrameSetupOpcode())
-+ return SPAdj;
-+ else
-+ return -SPAdj;
-+ }
-+
-+ // To know whether a call adjusts the stack, we need information
-+ // that is bound to the following ADJCALLSTACKUP pseudo.
-+ // Look for the next ADJCALLSTACKUP that follows the call.
-+ if (MI->isCall()) {
-+ const MachineBasicBlock* MBB = MI->getParent();
-+ auto I = ++MachineBasicBlock::const_iterator(MI);
-+ for (auto E = MBB->end(); I != E; ++I) {
-+ if (I->getOpcode() == getCallFrameDestroyOpcode() ||
-+ I->isCall())
-+ break;
-+ }
-+
-+ // If we could not find a frame destroy opcode, then it has already
-+ // been simplified, so we don't care.
-+ if (I->getOpcode() != getCallFrameDestroyOpcode())
-+ return 0;
-+
-+ return -(I->getOperand(1).getImm());
-+ }
-+
-+ // Currently handle only PUSHes we can reasonably expect to see
-+ // in call sequences
-+ switch (MI->getOpcode()) {
-+ default:
-+ return 0;
-+ case X86::PUSH32i8:
-+ case X86::PUSH32r:
-+ case X86::PUSH32rmm:
-+ case X86::PUSH32rmr:
-+ case X86::PUSHi32:
-+ return 4;
-+ }
-+}
-+
- /// isFrameOperand - Return true and the FrameIndex if the specified
- /// operand and follow operands form a reference to the stack frame.
- bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
-Index: lib/Target/X86/X86InstrInfo.h
-===================================================================
---- lib/Target/X86/X86InstrInfo.h
-+++ lib/Target/X86/X86InstrInfo.h
-@@ -175,6 +175,11 @@ class X86InstrInfo final : public X86GenInstrInfo
- ///
- const X86RegisterInfo &getRegisterInfo() const { return RI; }
-
-+ /// getSPAdjust - This returns the stack pointer adjustment made by
-+ /// this instruction. For x86, we need to handle more complex call
-+ /// sequences involving PUSHes.
-+ int getSPAdjust(const MachineInstr *MI) const override;
-+
- /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
- /// extension instruction. That is, it's like a copy where it's legal for the
- /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
-Index: lib/Target/X86/X86MachineFunctionInfo.h
-===================================================================
---- lib/Target/X86/X86MachineFunctionInfo.h
-+++ lib/Target/X86/X86MachineFunctionInfo.h
-@@ -77,6 +77,9 @@ class X86MachineFunctionInfo : public MachineFunct
- unsigned ArgumentStackSize;
- /// NumLocalDynamics - Number of local-dynamic TLS accesses.
- unsigned NumLocalDynamics;
-+ /// HasPushSequences - Keeps track of whether this function uses sequences
-+ /// of pushes to pass function parameters.
-+ bool HasPushSequences;
-
- private:
- /// ForwardedMustTailRegParms - A list of virtual and physical registers
-@@ -97,7 +100,8 @@ class X86MachineFunctionInfo : public MachineFunct
- VarArgsGPOffset(0),
- VarArgsFPOffset(0),
- ArgumentStackSize(0),
-- NumLocalDynamics(0) {}
-+ NumLocalDynamics(0),
-+ HasPushSequences(false) {}
-
- explicit X86MachineFunctionInfo(MachineFunction &MF)
- : ForceFramePointer(false),
-@@ -113,11 +117,15 @@ class X86MachineFunctionInfo : public MachineFunct
- VarArgsGPOffset(0),
- VarArgsFPOffset(0),
- ArgumentStackSize(0),
-- NumLocalDynamics(0) {}
-+ NumLocalDynamics(0),
-+ HasPushSequences(false) {}
-
- bool getForceFramePointer() const { return ForceFramePointer;}
- void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
-
-+ bool getHasPushSequences() const { return HasPushSequences; }
-+ void setHasPushSequences(bool HasPush) { HasPushSequences = HasPush; }
-+
- bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; }
- void setRestoreBasePointer(const MachineFunction *MF);
- int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
-Index: lib/Target/X86/X86RegisterInfo.cpp
-===================================================================
---- lib/Target/X86/X86RegisterInfo.cpp
-+++ lib/Target/X86/X86RegisterInfo.cpp
-@@ -468,8 +468,6 @@ void
- X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
-- assert(SPAdj == 0 && "Unexpected");
--
- MachineInstr &MI = *II;
- MachineFunction &MF = *MI.getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-@@ -506,6 +504,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicB
- } else
- FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
-
-+ if (BasePtr == StackPtr)
-+ FIOffset += SPAdj;
-+
- // The frame index format for stackmaps and patchpoints is different from the
- // X86 format. It only has a FI and an offset.
- if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
-Index: lib/Target/X86/X86TargetMachine.cpp
-===================================================================
---- lib/Target/X86/X86TargetMachine.cpp
-+++ lib/Target/X86/X86TargetMachine.cpp
-@@ -154,6 +154,7 @@ class X86PassConfig : public TargetPassConfig {
- void addIRPasses() override;
- bool addInstSelector() override;
- bool addILPOpts() override;
-+ void addPreRegAlloc() override;
- void addPostRegAlloc() override;
- void addPreEmitPass() override;
- };
-@@ -187,6 +188,10 @@ bool X86PassConfig::addILPOpts() {
- return true;
- }
-
-+void X86PassConfig::addPreRegAlloc() {
-+ addPass(createX86CallFrameOptimization());
-+}
-+
- void X86PassConfig::addPostRegAlloc() {
- addPass(createX86FloatingPointStackifierPass());
- }
-Index: test/CodeGen/X86/inalloca-invoke.ll
-===================================================================
---- test/CodeGen/X86/inalloca-invoke.ll
-+++ test/CodeGen/X86/inalloca-invoke.ll
-@@ -31,7 +31,7 @@ blah:
- to label %invoke.cont unwind label %lpad
-
- ; Uses end as sret param.
--; CHECK: movl %[[end]], (%esp)
-+; CHECK: pushl %[[end]]
- ; CHECK: calll _plus
-
- invoke.cont:
-Index: test/CodeGen/X86/movtopush.ll
-===================================================================
---- test/CodeGen/X86/movtopush.ll
-+++ test/CodeGen/X86/movtopush.ll
-@@ -1,10 +1,12 @@
- ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
-+; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
- ; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
-+
- declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
- declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
-
- ; Here, we should have a reserved frame, so we don't expect pushes
--; NORMAL-LABEL: test1
-+; NORMAL-LABEL: test1:
- ; NORMAL: subl $16, %esp
- ; NORMAL-NEXT: movl $4, 12(%esp)
- ; NORMAL-NEXT: movl $3, 8(%esp)
-@@ -11,6 +13,7 @@ declare void @inreg(i32 %a, i32 inreg %b, i32 %c,
- ; NORMAL-NEXT: movl $2, 4(%esp)
- ; NORMAL-NEXT: movl $1, (%esp)
- ; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
- define void @test1() {
- entry:
- call void @good(i32 1, i32 2, i32 3, i32 4)
-@@ -17,8 +20,10 @@ entry:
- ret void
- }
-
--; Here, we expect a sequence of 4 immediate pushes
--; NORMAL-LABEL: test2
-+; We're optimizing for code size, so we should get pushes for x86,
-+; even though there is a reserved call frame.
-+; Make sure we don't touch x86-64
-+; NORMAL-LABEL: test1b:
- ; NORMAL-NOT: subl {{.*}} %esp
- ; NORMAL: pushl $4
- ; NORMAL-NEXT: pushl $3
-@@ -25,6 +30,42 @@ entry:
- ; NORMAL-NEXT: pushl $2
- ; NORMAL-NEXT: pushl $1
- ; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+; X64-LABEL: test1b:
-+; X64: movl $1, %ecx
-+; X64-NEXT: movl $2, %edx
-+; X64-NEXT: movl $3, %r8d
-+; X64-NEXT: movl $4, %r9d
-+; X64-NEXT: callq good
-+define void @test1b() optsize {
-+entry:
-+ call void @good(i32 1, i32 2, i32 3, i32 4)
-+ ret void
-+}
-+
-+; Same as above, but for minsize
-+; NORMAL-LABEL: test1c:
-+; NORMAL-NOT: subl {{.*}} %esp
-+; NORMAL: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test1c() minsize {
-+entry:
-+ call void @good(i32 1, i32 2, i32 3, i32 4)
-+ ret void
-+}
-+
-+; If we have a reserved frame, we should have pushes
-+; NORMAL-LABEL: test2:
-+; NORMAL-NOT: subl {{.*}} %esp
-+; NORMAL: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
- define void @test2(i32 %k) {
- entry:
- %a = alloca i32, i32 %k
-@@ -34,7 +75,7 @@ entry:
-
- ; Again, we expect a sequence of 4 immediate pushes
- ; Checks that we generate the right pushes for >8bit immediates
--; NORMAL-LABEL: test2b
-+; NORMAL-LABEL: test2b:
- ; NORMAL-NOT: subl {{.*}} %esp
- ; NORMAL: pushl $4096
- ; NORMAL-NEXT: pushl $3072
-@@ -41,15 +82,15 @@ entry:
- ; NORMAL-NEXT: pushl $2048
- ; NORMAL-NEXT: pushl $1024
- ; NORMAL-NEXT: call
--define void @test2b(i32 %k) {
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test2b() optsize {
- entry:
-- %a = alloca i32, i32 %k
- call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
- ret void
- }
-
- ; The first push should push a register
--; NORMAL-LABEL: test3
-+; NORMAL-LABEL: test3:
- ; NORMAL-NOT: subl {{.*}} %esp
- ; NORMAL: pushl $4
- ; NORMAL-NEXT: pushl $3
-@@ -56,15 +97,15 @@ entry:
- ; NORMAL-NEXT: pushl $2
- ; NORMAL-NEXT: pushl %e{{..}}
- ; NORMAL-NEXT: call
--define void @test3(i32 %k) {
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test3(i32 %k) optsize {
- entry:
-- %a = alloca i32, i32 %k
- call void @good(i32 %k, i32 2, i32 3, i32 4)
- ret void
- }
-
- ; We don't support weird calling conventions
--; NORMAL-LABEL: test4
-+; NORMAL-LABEL: test4:
- ; NORMAL: subl $12, %esp
- ; NORMAL-NEXT: movl $4, 8(%esp)
- ; NORMAL-NEXT: movl $3, 4(%esp)
-@@ -71,16 +112,16 @@ entry:
- ; NORMAL-NEXT: movl $1, (%esp)
- ; NORMAL-NEXT: movl $2, %eax
- ; NORMAL-NEXT: call
--define void @test4(i32 %k) {
-+; NORMAL-NEXT: addl $12, %esp
-+define void @test4() optsize {
- entry:
-- %a = alloca i32, i32 %k
- call void @inreg(i32 1, i32 2, i32 3, i32 4)
- ret void
- }
-
--; Check that additional alignment is added when the pushes
--; don't add up to the required alignment.
--; ALIGNED-LABEL: test5
-+; When there is no reserved call frame, check that additional alignment
-+; is added when the pushes don't add up to the required alignment.
-+; ALIGNED-LABEL: test5:
- ; ALIGNED: subl $16, %esp
- ; ALIGNED-NEXT: pushl $4
- ; ALIGNED-NEXT: pushl $3
-@@ -97,7 +138,7 @@ entry:
- ; Check that pushing the addresses of globals (Or generally, things that
- ; aren't exactly immediates) isn't broken.
- ; Fixes PR21878.
--; NORMAL-LABEL: test6
-+; NORMAL-LABEL: test6:
- ; NORMAL: pushl $_ext
- ; NORMAL-NEXT: call
- declare void @f(i8*)
-@@ -110,3 +151,108 @@ bb:
- alloca i32
- ret void
- }
-+
-+; Check that we fold simple cases into the push
-+; NORMAL-LABEL: test7:
-+; NORMAL-NOT: subl {{.*}} %esp
-+; NORMAL: movl 4(%esp), [[EAX:%e..]]
-+; NORMAL-NEXT: pushl $4
-+; NORMAL-NEXT: pushl ([[EAX]])
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test7(i32* %ptr) optsize {
-+entry:
-+ %val = load i32* %ptr
-+ call void @good(i32 1, i32 2, i32 %val, i32 4)
-+ ret void
-+}
-+
-+; But we don't want to fold stack-relative loads into the push,
-+; because the offset will be wrong
-+; NORMAL-LABEL: test8:
-+; NORMAL-NOT: subl {{.*}} %esp
-+; NORMAL: movl 4(%esp), [[EAX:%e..]]
-+; NORMAL-NEXT: pushl $4
-+; NORMAL-NEXT: pushl [[EAX]]
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test8(i32* %ptr) optsize {
-+entry:
-+ %val = ptrtoint i32* %ptr to i32
-+ call void @good(i32 1, i32 2, i32 %val, i32 4)
-+ ret void
-+}
-+
-+; If one function is using push instructions, and the other isn't
-+; (because it has frame-index references), then we must resolve
-+; these references correctly.
-+; NORMAL-LABEL: test9:
-+; NORMAL-NOT: leal (%esp),
-+; NORMAL: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+; NORMAL-NEXT: subl $16, %esp
-+; NORMAL-NEXT: leal 16(%esp), [[EAX:%e..]]
-+; NORMAL-NEXT: movl [[EAX]], 12(%esp)
-+; NORMAL-NEXT: movl $7, 8(%esp)
-+; NORMAL-NEXT: movl $6, 4(%esp)
-+; NORMAL-NEXT: movl $5, (%esp)
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test9() optsize {
-+entry:
-+ %p = alloca i32, align 4
-+ call void @good(i32 1, i32 2, i32 3, i32 4)
-+ %0 = ptrtoint i32* %p to i32
-+ call void @good(i32 5, i32 6, i32 7, i32 %0)
-+ ret void
-+}
-+
-+; We can end up with an indirect call which gets reloaded on the spot.
-+; Make sure we reference the correct stack slot - we spill into (%esp)
-+; and reload from 16(%esp) due to the pushes.
-+; NORMAL-LABEL: test10:
-+; NORMAL: movl $_good, [[ALLOC:.*]]
-+; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]]
-+; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill
-+; NORMAL: nop
-+; NORMAL: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl $1
-+; NORMAL-NEXT: calll *16(%esp)
-+; NORMAL-NEXT: addl $16, %esp
-+define void @test10() optsize {
-+ %stack_fptr = alloca void (i32, i32, i32, i32)*
-+ store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
-+ %good_ptr = load volatile void (i32, i32, i32, i32)** %stack_fptr
-+ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
-+ call void (i32, i32, i32, i32)* %good_ptr(i32 1, i32 2, i32 3, i32 4)
-+ ret void
-+}
-+
-+; We can't fold the load from the global into the push because of
-+; interference from the store
-+; NORMAL-LABEL: test11:
-+; NORMAL: movl _the_global, [[EAX:%e..]]
-+; NORMAL-NEXT: movl $42, _the_global
-+; NORMAL-NEXT: pushl $4
-+; NORMAL-NEXT: pushl $3
-+; NORMAL-NEXT: pushl $2
-+; NORMAL-NEXT: pushl [[EAX]]
-+; NORMAL-NEXT: call
-+; NORMAL-NEXT: addl $16, %esp
-+@the_global = external global i32
-+define void @test11() optsize {
-+ %myload = load i32* @the_global
-+ store i32 42, i32* @the_global
-+ call void @good(i32 %myload, i32 2, i32 3, i32 4)
-+ ret void
-+}
diff --git a/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff b/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff
deleted file mode 100644
index 2896899..0000000
--- a/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff
+++ /dev/null
@@ -1,419 +0,0 @@
-Pull in r230348 from upstream llvm trunk (by Tim Northover):
-
- ARM: treat [N x i32] and [N x i64] as AAPCS composite types
-
- The logic is almost there already, with our special homogeneous
- aggregate handling. Tweaking it like this allows front-ends to emit
- AAPCS compliant code without ever having to count registers or add
- discarded padding arguments.
-
- Only arrays of i32 and i64 are needed to model AAPCS rules, but I
- decided to apply the logic to all integer arrays for more consistency.
-
-This fixes a possible "Unexpected member type for HA" error when
-compiling lib/msun/bsdsrc/b_tgamma.c for armv6.
-
-Reported by: Jakub Palider <jpa@semihalf.com>
-
-Introduced here: https://svnweb.freebsd.org/changeset/base/280400
-
-Index: include/llvm/CodeGen/CallingConvLower.h
-===================================================================
---- include/llvm/CodeGen/CallingConvLower.h
-+++ include/llvm/CodeGen/CallingConvLower.h
-@@ -122,8 +122,8 @@ class CCValAssign {
- // There is no need to differentiate between a pending CCValAssign and other
- // kinds, as they are stored in a different list.
- static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT,
-- LocInfo HTP) {
-- return getReg(ValNo, ValVT, 0, LocVT, HTP);
-+ LocInfo HTP, unsigned ExtraInfo = 0) {
-+ return getReg(ValNo, ValVT, ExtraInfo, LocVT, HTP);
- }
-
- void convertToReg(unsigned RegNo) {
-@@ -146,6 +146,7 @@ class CCValAssign {
-
- unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
- unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
-+ unsigned getExtraInfo() const { return Loc; }
- MVT getLocVT() const { return LocVT; }
-
- LocInfo getLocInfo() const { return HTP; }
-Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-===================================================================
---- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-@@ -7429,11 +7429,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo
- }
- if (Args[i].isNest)
- Flags.setNest();
-- if (NeedsRegBlock) {
-+ if (NeedsRegBlock)
- Flags.setInConsecutiveRegs();
-- if (Value == NumValues - 1)
-- Flags.setInConsecutiveRegsLast();
-- }
- Flags.setOrigAlign(OriginalAlignment);
-
- MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
-@@ -7482,6 +7479,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo
- CLI.Outs.push_back(MyFlags);
- CLI.OutVals.push_back(Parts[j]);
- }
-+
-+ if (NeedsRegBlock && Value == NumValues - 1)
-+ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
- }
- }
-
-@@ -7697,11 +7697,8 @@ void SelectionDAGISel::LowerArguments(const Functi
- }
- if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
- Flags.setNest();
-- if (NeedsRegBlock) {
-+ if (NeedsRegBlock)
- Flags.setInConsecutiveRegs();
-- if (Value == NumValues - 1)
-- Flags.setInConsecutiveRegsLast();
-- }
- Flags.setOrigAlign(OriginalAlignment);
-
- MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
-@@ -7716,6 +7713,8 @@ void SelectionDAGISel::LowerArguments(const Functi
- MyFlags.Flags.setOrigAlign(1);
- Ins.push_back(MyFlags);
- }
-+ if (NeedsRegBlock && Value == NumValues - 1)
-+ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
- PartBase += VT.getStoreSize();
- }
- }
-Index: lib/Target/ARM/ARMCallingConv.h
-===================================================================
---- lib/Target/ARM/ARMCallingConv.h
-+++ lib/Target/ARM/ARMCallingConv.h
-@@ -160,6 +160,8 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &V
- State);
- }
-
-+static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-+
- static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
- ARM::S4, ARM::S5, ARM::S6, ARM::S7,
- ARM::S8, ARM::S9, ARM::S10, ARM::S11,
-@@ -168,81 +170,114 @@ static const uint16_t DRegList[] = { ARM::D0, ARM:
- ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
- static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
-
-+
- // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
- // has InConsecutiveRegs set, and that the last member also has
- // InConsecutiveRegsLast set. We must process all members of the HA before
- // we can allocate it, as we need to know the total number of registers that
- // will be needed in order to (attempt to) allocate a contiguous block.
--static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-- CCValAssign::LocInfo &LocInfo,
-- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
-- SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
-+static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
-+ MVT &LocVT,
-+ CCValAssign::LocInfo &LocInfo,
-+ ISD::ArgFlagsTy &ArgFlags,
-+ CCState &State) {
-+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
-
- // AAPCS HFAs must have 1-4 elements, all of the same type
-- assert(PendingHAMembers.size() < 4);
-- if (PendingHAMembers.size() > 0)
-- assert(PendingHAMembers[0].getLocVT() == LocVT);
-+ if (PendingMembers.size() > 0)
-+ assert(PendingMembers[0].getLocVT() == LocVT);
-
- // Add the argument to the list to be allocated once we know the size of the
-- // HA
-- PendingHAMembers.push_back(
-- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
-+ // aggregate. Store the type's required alignmnent as extra info for later: in
-+ // the [N x i64] case all trace has been removed by the time we actually get
-+ // to do allocation.
-+ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo,
-+ ArgFlags.getOrigAlign()));
-
-- if (ArgFlags.isInConsecutiveRegsLast()) {
-- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 &&
-- "Homogeneous aggregates must have between 1 and 4 members");
-+ if (!ArgFlags.isInConsecutiveRegsLast())
-+ return true;
-
-- // Try to allocate a contiguous block of registers, each of the correct
-- // size to hold one member.
-- ArrayRef<uint16_t> RegList;
-- switch (LocVT.SimpleTy) {
-- case MVT::f32:
-- RegList = SRegList;
-- break;
-- case MVT::f64:
-- RegList = DRegList;
-- break;
-- case MVT::v2f64:
-- RegList = QRegList;
-- break;
-- default:
-- llvm_unreachable("Unexpected member type for HA");
-- break;
-- }
-+ // Try to allocate a contiguous block of registers, each of the correct
-+ // size to hold one member.
-+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
-
-- unsigned RegResult =
-- State.AllocateRegBlock(RegList, PendingHAMembers.size());
-+ ArrayRef<uint16_t> RegList;
-+ switch (LocVT.SimpleTy) {
-+ case MVT::i32: {
-+ RegList = RRegList;
-+ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size());
-
-- if (RegResult) {
-- for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
-- It != PendingHAMembers.end(); ++It) {
-- It->convertToReg(RegResult);
-- State.addLoc(*It);
-- ++RegResult;
-- }
-- PendingHAMembers.clear();
-- return true;
-- }
-+ // First consume all registers that would give an unaligned object. Whether
-+ // we go on stack or in regs, no-one will be using them in future.
-+ unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4;
-+ while (RegIdx % RegAlign != 0 && RegIdx < RegList.size())
-+ State.AllocateReg(RegList[RegIdx++]);
-
-- // Register allocation failed, fall back to the stack
-+ break;
-+ }
-+ case MVT::f32:
-+ RegList = SRegList;
-+ break;
-+ case MVT::f64:
-+ RegList = DRegList;
-+ break;
-+ case MVT::v2f64:
-+ RegList = QRegList;
-+ break;
-+ default:
-+ llvm_unreachable("Unexpected member type for block aggregate");
-+ break;
-+ }
-
-- // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp)
-- for (unsigned regNo = 0; regNo < 16; ++regNo)
-- State.AllocateReg(SRegList[regNo]);
-+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
-+ if (RegResult) {
-+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin();
-+ It != PendingMembers.end(); ++It) {
-+ It->convertToReg(RegResult);
-+ State.addLoc(*It);
-+ ++RegResult;
-+ }
-+ PendingMembers.clear();
-+ return true;
-+ }
-
-- unsigned Size = LocVT.getSizeInBits() / 8;
-- unsigned Align = std::min(Size, 8U);
-+ // Register allocation failed, we'll be needing the stack
-+ unsigned Size = LocVT.getSizeInBits() / 8;
-+ if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
-+ // If nothing else has used the stack until this point, a non-HFA aggregate
-+ // can be split between regs and stack.
-+ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size());
-+ for (auto &It : PendingMembers) {
-+ if (RegIdx >= RegList.size())
-+ It.convertToMem(State.AllocateStack(Size, Size));
-+ else
-+ It.convertToReg(State.AllocateReg(RegList[RegIdx++]));
-
-- for (auto It : PendingHAMembers) {
-- It.convertToMem(State.AllocateStack(Size, Align));
- State.addLoc(It);
- }
-+ PendingMembers.clear();
-+ return true;
-+ } else if (LocVT != MVT::i32)
-+ RegList = SRegList;
-
-- // All pending members have now been allocated
-- PendingHAMembers.clear();
-+ // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core)
-+ for (auto Reg : RegList)
-+ State.AllocateReg(Reg);
-+
-+ for (auto &It : PendingMembers) {
-+ It.convertToMem(State.AllocateStack(Size, Align));
-+ State.addLoc(It);
-+
-+ // After the first item has been allocated, the rest are packed as tightly
-+ // as possible. (E.g. an incoming i64 would have starting Align of 8, but
-+ // we'll be allocating a bunch of i32 slots).
-+ Align = Size;
- }
-
-- // This will be allocated by the last member of the HA
-+ // All pending members have now been allocated
-+ PendingMembers.clear();
-+
-+ // This will be allocated by the last member of the aggregate
- return true;
- }
-
-Index: lib/Target/ARM/ARMCallingConv.td
-===================================================================
---- lib/Target/ARM/ARMCallingConv.td
-+++ lib/Target/ARM/ARMCallingConv.td
-@@ -175,7 +175,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
-
- // HFAs are passed in a contiguous block of registers, or on the stack
-- CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>,
-+ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>,
-
- CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
- CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
-Index: lib/Target/ARM/ARMISelLowering.cpp
-===================================================================
---- lib/Target/ARM/ARMISelLowering.cpp
-+++ lib/Target/ARM/ARMISelLowering.cpp
-@@ -11285,7 +11285,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas
- return (Members > 0 && Members <= 4);
- }
-
--/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
-+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
-+/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
-+/// passing according to AAPCS rules.
- bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
- Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
- if (getEffectiveCallingConv(CallConv, isVarArg) !=
-@@ -11294,7 +11296,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse
-
- HABaseType Base = HA_UNKNOWN;
- uint64_t Members = 0;
-- bool result = isHomogeneousAggregate(Ty, Base, Members);
-- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump());
-- return result;
-+ bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
-+ DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
-+
-+ bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
-+ return IsHA || IsIntArray;
- }
-Index: test/CodeGen/ARM/aggregate-padding.ll
-===================================================================
---- test/CodeGen/ARM/aggregate-padding.ll
-+++ test/CodeGen/ARM/aggregate-padding.ll
-@@ -0,0 +1,101 @@
-+; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
-+
-+; [2 x i64] should be contiguous when split (e.g. we shouldn't try to align all
-+; i32 components to 64 bits). Also makes sure i64 based types are properly
-+; aligned on the stack.
-+define i64 @test_i64_contiguous_on_stack([8 x double], float, i32 %in, [2 x i64] %arg) nounwind {
-+; CHECK-LABEL: test_i64_contiguous_on_stack:
-+; CHECK-DAG: ldr [[LO0:r[0-9]+]], [sp, #8]
-+; CHECK-DAG: ldr [[HI0:r[0-9]+]], [sp, #12]
-+; CHECK-DAG: ldr [[LO1:r[0-9]+]], [sp, #16]
-+; CHECK-DAG: ldr [[HI1:r[0-9]+]], [sp, #20]
-+; CHECK: adds r0, [[LO0]], [[LO1]]
-+; CHECK: adc r1, [[HI0]], [[HI1]]
-+
-+ %val1 = extractvalue [2 x i64] %arg, 0
-+ %val2 = extractvalue [2 x i64] %arg, 1
-+ %sum = add i64 %val1, %val2
-+ ret i64 %sum
-+}
-+
-+; [2 x i64] should try to use looks for 4 regs, not 8 (which might happen if the
-+; i64 -> i32, i32 split wasn't handled correctly).
-+define i64 @test_2xi64_uses_4_regs([8 x double], float, [2 x i64] %arg) nounwind {
-+; CHECK-LABEL: test_2xi64_uses_4_regs:
-+; CHECK-DAG: mov r0, r2
-+; CHECK-DAG: mov r1, r3
-+
-+ %val = extractvalue [2 x i64] %arg, 1
-+ ret i64 %val
-+}
-+
-+; An aggregate should be able to split between registers and stack if there is
-+; nothing else on the stack.
-+define i32 @test_aggregates_split([8 x double], i32, [4 x i32] %arg) nounwind {
-+; CHECK-LABEL: test_aggregates_split:
-+; CHECK: ldr [[VAL3:r[0-9]+]], [sp]
-+; CHECK: add r0, r1, [[VAL3]]
-+
-+ %val0 = extractvalue [4 x i32] %arg, 0
-+ %val3 = extractvalue [4 x i32] %arg, 3
-+ %sum = add i32 %val0, %val3
-+ ret i32 %sum
-+}
-+
-+; If an aggregate has to be moved entirely onto the stack, nothing should be
-+; able to use r0-r3 any more. Also checks that [2 x i64] properly aligned when
-+; it uses regs.
-+define i32 @test_no_int_backfilling([8 x double], float, i32, [2 x i64], i32 %arg) nounwind {
-+; CHECK-LABEL: test_no_int_backfilling:
-+; CHECK: ldr r0, [sp, #24]
-+ ret i32 %arg
-+}
-+
-+; Even if the argument was successfully allocated as reg block, there should be
-+; no backfillig to r1.
-+define i32 @test_no_int_backfilling_regsonly(i32, [1 x i64], i32 %arg) {
-+; CHECK-LABEL: test_no_int_backfilling_regsonly:
-+; CHECK: ldr r0, [sp]
-+ ret i32 %arg
-+}
-+
-+; If an aggregate has to be moved entirely onto the stack, nothing should be
-+; able to use r0-r3 any more.
-+define float @test_no_float_backfilling([7 x double], [4 x i32], i32, [4 x double], float %arg) nounwind {
-+; CHECK-LABEL: test_no_float_backfilling:
-+; CHECK: vldr s0, [sp, #40]
-+ ret float %arg
-+}
-+
-+; They're a bit pointless, but types like [N x i8] should work as well.
-+define i8 @test_i8_in_regs(i32, [3 x i8] %arg) {
-+; CHECK-LABEL: test_i8_in_regs:
-+; CHECK: add r0, r1, r3
-+ %val0 = extractvalue [3 x i8] %arg, 0
-+ %val2 = extractvalue [3 x i8] %arg, 2
-+ %sum = add i8 %val0, %val2
-+ ret i8 %sum
-+}
-+
-+define i16 @test_i16_split(i32, i32, [3 x i16] %arg) {
-+; CHECK-LABEL: test_i16_split:
-+; CHECK: ldrh [[VAL2:r[0-9]+]], [sp]
-+; CHECK: add r0, r2, [[VAL2]]
-+ %val0 = extractvalue [3 x i16] %arg, 0
-+ %val2 = extractvalue [3 x i16] %arg, 2
-+ %sum = add i16 %val0, %val2
-+ ret i16 %sum
-+}
-+
-+; Beware: on the stack each i16 still gets a 32-bit slot, the array is not
-+; packed.
-+define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg) {
-+; CHECK-LABEL: test_i16_forced_stack:
-+; CHECK-DAG: ldrh [[VAL0:r[0-9]+]], [sp, #8]
-+; CHECK-DAG: ldrh [[VAL2:r[0-9]+]], [sp, #16]
-+; CHECK: add r0, [[VAL0]], [[VAL2]]
-+ %val0 = extractvalue [3 x i16] %arg, 0
-+ %val2 = extractvalue [3 x i16] %arg, 2
-+ %sum = add i16 %val0, %val2
-+ ret i16 %sum
-+}
diff --git a/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff b/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff
deleted file mode 100644
index 33ca358..0000000
--- a/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff
+++ /dev/null
@@ -1,50 +0,0 @@
-Pull in r227115 from upstream clang trunk (by Ben Langmuir):
-
- Fix assert instantiating string init of static variable
-
- ... when the variable's type is a typedef of a ConstantArrayType. Just
- look through the typedef (and any other sugar). We only use the
- constant array type here to get the element count.
-
-This fixes an assertion failure when building the games/redeclipse port.
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/281046
-
-Index: tools/clang/lib/Sema/SemaInit.cpp
-===================================================================
---- tools/clang/lib/Sema/SemaInit.cpp
-+++ tools/clang/lib/Sema/SemaInit.cpp
-@@ -149,10 +149,10 @@ static void updateStringLiteralType(Expr *E, QualT
- static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT,
- Sema &S) {
- // Get the length of the string as parsed.
-- uint64_t StrLength =
-- cast<ConstantArrayType>(Str->getType())->getSize().getZExtValue();
-+ auto *ConstantArrayTy =
-+ cast<ConstantArrayType>(Str->getType()->getUnqualifiedDesugaredType());
-+ uint64_t StrLength = ConstantArrayTy->getSize().getZExtValue();
-
--
- if (const IncompleteArrayType *IAT = dyn_cast<IncompleteArrayType>(AT)) {
- // C99 6.7.8p14. We have an array of character type with unknown size
- // being initialized to a string literal.
-Index: tools/clang/test/SemaTemplate/instantiate-static-var.cpp
-===================================================================
---- tools/clang/test/SemaTemplate/instantiate-static-var.cpp
-+++ tools/clang/test/SemaTemplate/instantiate-static-var.cpp
-@@ -114,3 +114,15 @@ namespace PR6449 {
- template class X1<char>;
-
- }
-+
-+typedef char MyString[100];
-+template <typename T>
-+struct StaticVarWithTypedefString {
-+ static MyString str;
-+};
-+template <typename T>
-+MyString StaticVarWithTypedefString<T>::str = "";
-+
-+void testStaticVarWithTypedefString() {
-+ (void)StaticVarWithTypedefString<int>::str;
-+}
OpenPOWER on IntegriCloud