diff options
author | dim <dim@FreeBSD.org> | 2015-05-30 15:36:23 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-05-30 15:36:23 +0000 |
commit | 3cd22c5584a700a2036b948ef96ae41bbd233bb6 (patch) | |
tree | e9f6288dce1b090572b6ade998862abc53e54b80 /contrib/llvm/patches | |
parent | e3e0f940d5a51d987e16f07a7e27c1a99c8972c2 (diff) | |
download | FreeBSD-src-3cd22c5584a700a2036b948ef96ae41bbd233bb6.zip FreeBSD-src-3cd22c5584a700a2036b948ef96ae41bbd233bb6.tar.gz |
Drop llvm/clang patches which are no longer necessary.
Diffstat (limited to 'contrib/llvm/patches')
-rw-r--r-- | contrib/llvm/patches/README.TXT | 8 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-01-clang-vendor-suffix.diff (renamed from contrib/llvm/patches/patch-02-clang-vendor-suffix.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-01-freebsd-kprintf.diff | 381 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-02-add-CC-aliases.diff (renamed from contrib/llvm/patches/patch-03-add-CC-aliases.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-03-enable-armv6-clrex.diff (renamed from contrib/llvm/patches/patch-05-enable-armv6-clrex.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff | 31 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-04-clang-add-mips-triples.diff (renamed from contrib/llvm/patches/patch-06-clang-add-mips-triples.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff | 1271 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff | 419 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff | 50 |
10 files changed, 4 insertions, 2156 deletions
diff --git a/contrib/llvm/patches/README.TXT b/contrib/llvm/patches/README.TXT index 7bc26d2..220baf0 100644 --- a/contrib/llvm/patches/README.TXT +++ b/contrib/llvm/patches/README.TXT @@ -1,11 +1,11 @@ This is a set of individual patches, which contain all the customizations to llvm/clang currently in the FreeBSD base system. These can be applied in -alphabetical order to a pristine llvm/clang 3.6.1 source tree, for example by +alphabetical order to a pristine llvm/clang 3.7.0 source tree, for example by doing: -svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_361/final llvm-3.6.1 -svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_361/final llvm-3.6.1/tools/clang -cd llvm-3.6.1 +svn co https://llvm.org/svn/llvm-project/llvm/trunk llvm-3.7.0 +svn co https://llvm.org/svn/llvm-project/cfe/trunk llvm-3.7.0/tools/clang +cd llvm-3.7.0 for p in /usr/src/contrib/llvm/patches/patch-*.diff; do patch -p0 -f -F0 -E -i $p -s || break done diff --git a/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff b/contrib/llvm/patches/patch-01-clang-vendor-suffix.diff index f94b9f3..f94b9f3 100644 --- a/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff +++ b/contrib/llvm/patches/patch-01-clang-vendor-suffix.diff diff --git a/contrib/llvm/patches/patch-01-freebsd-kprintf.diff b/contrib/llvm/patches/patch-01-freebsd-kprintf.diff deleted file mode 100644 index 252b4cd..0000000 --- a/contrib/llvm/patches/patch-01-freebsd-kprintf.diff +++ /dev/null @@ -1,381 +0,0 @@ -This patch adds support for the FreeBSD kernel specific printf format -specifiers: %b, %D, %r and %y, via a new __freebsd_kprintf__ format -string type. - -Sent upstream as http://reviews.llvm.org/D7154 - -Index: tools/clang/include/clang/Analysis/Analyses/FormatString.h -=================================================================== ---- tools/clang/include/clang/Analysis/Analyses/FormatString.h -+++ tools/clang/include/clang/Analysis/Analyses/FormatString.h -@@ -161,6 +161,12 @@ class ConversionSpecifier { - ObjCObjArg, // '@' - ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, - -+ // FreeBSD kernel specific specifiers. -+ FreeBSDbArg, -+ FreeBSDDArg, -+ FreeBSDrArg, -+ FreeBSDyArg, -+ - // GlibC specific specifiers. - PrintErrno, // 'm' - -@@ -204,7 +210,8 @@ class ConversionSpecifier { - return EndScanList ? EndScanList - Position : 1; - } - -- bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } -+ bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) || -+ kind == FreeBSDrArg || kind == FreeBSDyArg; } - bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } - bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } - const char *toString() const; -@@ -646,7 +653,7 @@ class FormatStringHandler { - - bool ParsePrintfString(FormatStringHandler &H, - const char *beg, const char *end, const LangOptions &LO, -- const TargetInfo &Target); -+ const TargetInfo &Target, bool isFreeBSDKPrintf); - - bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO, - const TargetInfo &Target); -Index: tools/clang/include/clang/Sema/Sema.h -=================================================================== ---- tools/clang/include/clang/Sema/Sema.h -+++ tools/clang/include/clang/Sema/Sema.h -@@ -8567,6 +8567,7 @@ class Sema { - FST_Strftime, - FST_Strfmon, - FST_Kprintf, -+ FST_FreeBSDKPrintf, - FST_Unknown - }; - static FormatStringType GetFormatStringType(const FormatAttr *Format); -Index: tools/clang/lib/Analysis/FormatString.cpp -=================================================================== ---- tools/clang/lib/Analysis/FormatString.cpp -+++ tools/clang/lib/Analysis/FormatString.cpp -@@ -552,6 +552,12 @@ const char *ConversionSpecifier::toString() const - // Objective-C specific specifiers. - case ObjCObjArg: return "@"; - -+ // FreeBSD kernel specific specifiers. -+ case FreeBSDbArg: return "b"; -+ case FreeBSDDArg: return "D"; -+ case FreeBSDrArg: return "r"; -+ case FreeBSDyArg: return "y"; -+ - // GlibC specific specifiers. - case PrintErrno: return "m"; - -@@ -647,6 +653,9 @@ bool FormatSpecifier::hasValidLengthModifier(const - case ConversionSpecifier::XArg: - case ConversionSpecifier::nArg: - return true; -+ case ConversionSpecifier::FreeBSDrArg: -+ case ConversionSpecifier::FreeBSDyArg: -+ return Target.getTriple().isOSFreeBSD(); - default: - return false; - } -@@ -677,6 +686,9 @@ bool FormatSpecifier::hasValidLengthModifier(const - case ConversionSpecifier::ScanListArg: - case ConversionSpecifier::ZArg: - return true; -+ case ConversionSpecifier::FreeBSDrArg: -+ case ConversionSpecifier::FreeBSDyArg: -+ return Target.getTriple().isOSFreeBSD(); - default: - return false; - } -@@ -807,6 +819,10 @@ bool FormatSpecifier::hasStandardConversionSpecifi - case ConversionSpecifier::SArg: - return LangOpt.ObjC1 || LangOpt.ObjC2; - case ConversionSpecifier::InvalidSpecifier: -+ case ConversionSpecifier::FreeBSDbArg: -+ case ConversionSpecifier::FreeBSDDArg: -+ case ConversionSpecifier::FreeBSDrArg: -+ case ConversionSpecifier::FreeBSDyArg: - case ConversionSpecifier::PrintErrno: - case ConversionSpecifier::DArg: - case ConversionSpecifier::OArg: -Index: tools/clang/lib/Analysis/PrintfFormatString.cpp -=================================================================== ---- tools/clang/lib/Analysis/PrintfFormatString.cpp -+++ tools/clang/lib/Analysis/PrintfFormatString.cpp -@@ -55,7 +55,8 @@ static PrintfSpecifierResult ParsePrintfSpecifier( - unsigned &argIndex, - const LangOptions &LO, - const TargetInfo &Target, -- bool Warn) { -+ bool Warn, -+ bool isFreeBSDKPrintf) { - - using namespace clang::analyze_format_string; - using namespace clang::analyze_printf; -@@ -206,9 +207,24 @@ static PrintfSpecifierResult ParsePrintfSpecifier( - case '@': k = ConversionSpecifier::ObjCObjArg; break; - // Glibc specific. - case 'm': k = ConversionSpecifier::PrintErrno; break; -+ // FreeBSD kernel specific. -+ case 'b': -+ if (isFreeBSDKPrintf) -+ k = ConversionSpecifier::FreeBSDbArg; // int followed by char * -+ break; -+ case 'r': -+ if (isFreeBSDKPrintf) -+ k = ConversionSpecifier::FreeBSDrArg; // int -+ break; -+ case 'y': -+ if (isFreeBSDKPrintf) -+ k = ConversionSpecifier::FreeBSDyArg; // int -+ break; - // Apple-specific. - case 'D': -- if (Target.getTriple().isOSDarwin()) -+ if (isFreeBSDKPrintf) -+ k = ConversionSpecifier::FreeBSDDArg; // void * followed by char * -+ else if (Target.getTriple().isOSDarwin()) - k = ConversionSpecifier::DArg; - break; - case 'O': -@@ -228,6 +244,10 @@ static PrintfSpecifierResult ParsePrintfSpecifier( - FS.setConversionSpecifier(CS); - if (CS.consumesDataArgument() && !FS.usesPositionalArg()) - FS.setArgIndex(argIndex++); -+ // FreeBSD kernel specific. -+ if (k == ConversionSpecifier::FreeBSDbArg || -+ k == ConversionSpecifier::FreeBSDDArg) -+ argIndex++; - - if (k == ConversionSpecifier::InvalidSpecifier) { - // Assume the conversion takes one argument. -@@ -240,7 +260,8 @@ bool clang::analyze_format_string::ParsePrintfStri - const char *I, - const char *E, - const LangOptions &LO, -- const TargetInfo &Target) { -+ const TargetInfo &Target, -+ bool isFreeBSDKPrintf) { - - unsigned argIndex = 0; - -@@ -247,7 +268,8 @@ bool clang::analyze_format_string::ParsePrintfStri - // Keep looking for a format specifier until we have exhausted the string. - while (I != E) { - const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, -- LO, Target, true); -+ LO, Target, true, -+ isFreeBSDKPrintf); - // Did a fail-stop error of any kind occur when parsing the specifier? - // If so, don't do any more processing. - if (FSR.shouldStop()) -@@ -276,7 +298,8 @@ bool clang::analyze_format_string::ParseFormatStri - FormatStringHandler H; - while (I != E) { - const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, -- LO, Target, false); -+ LO, Target, false, -+ false); - // Did a fail-stop error of any kind occur when parsing the specifier? - // If so, don't do any more processing. - if (FSR.shouldStop()) -@@ -674,6 +697,8 @@ bool PrintfSpecifier::hasValidPlusPrefix() const { - case ConversionSpecifier::GArg: - case ConversionSpecifier::aArg: - case ConversionSpecifier::AArg: -+ case ConversionSpecifier::FreeBSDrArg: -+ case ConversionSpecifier::FreeBSDyArg: - return true; - - default: -@@ -699,6 +724,8 @@ bool PrintfSpecifier::hasValidAlternativeForm() co - case ConversionSpecifier::FArg: - case ConversionSpecifier::gArg: - case ConversionSpecifier::GArg: -+ case ConversionSpecifier::FreeBSDrArg: -+ case ConversionSpecifier::FreeBSDyArg: - return true; - - default: -@@ -729,6 +756,8 @@ bool PrintfSpecifier::hasValidLeadingZeros() const - case ConversionSpecifier::FArg: - case ConversionSpecifier::gArg: - case ConversionSpecifier::GArg: -+ case ConversionSpecifier::FreeBSDrArg: -+ case ConversionSpecifier::FreeBSDyArg: - return true; - - default: -@@ -753,6 +782,8 @@ bool PrintfSpecifier::hasValidSpacePrefix() const - case ConversionSpecifier::GArg: - case ConversionSpecifier::aArg: - case ConversionSpecifier::AArg: -+ case ConversionSpecifier::FreeBSDrArg: -+ case ConversionSpecifier::FreeBSDyArg: - return true; - - default: -@@ -818,6 +849,8 @@ bool PrintfSpecifier::hasValidPrecision() const { - case ConversionSpecifier::gArg: - case ConversionSpecifier::GArg: - case ConversionSpecifier::sArg: -+ case ConversionSpecifier::FreeBSDrArg: -+ case ConversionSpecifier::FreeBSDyArg: - return true; - - default: -Index: tools/clang/lib/Sema/SemaChecking.cpp -=================================================================== ---- tools/clang/lib/Sema/SemaChecking.cpp -+++ tools/clang/lib/Sema/SemaChecking.cpp -@@ -2603,6 +2603,7 @@ Sema::FormatStringType Sema::GetFormatStringType(c - .Case("strftime", FST_Strftime) - .Case("strfmon", FST_Strfmon) - .Cases("kprintf", "cmn_err", "vcmn_err", "zcmn_err", FST_Kprintf) -+ .Case("freebsd_kprintf", FST_FreeBSDKPrintf) - .Default(FST_Unknown); - } - -@@ -3384,6 +3385,43 @@ CheckPrintfHandler::HandlePrintfSpecifier(const an - CoveredArgs.set(argIndex); - } - -+ // FreeBSD kernel extensions. -+ if (CS.getKind() == ConversionSpecifier::FreeBSDbArg || -+ CS.getKind() == ConversionSpecifier::FreeBSDDArg) { -+ // We need at least two arguments. -+ if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex + 1)) -+ return false; -+ -+ // Claim the second argument. -+ CoveredArgs.set(argIndex + 1); -+ -+ // Type check the first argument (int for %b, pointer for %D) -+ const Expr *Ex = getDataArg(argIndex); -+ const analyze_printf::ArgType &AT = -+ (CS.getKind() == ConversionSpecifier::FreeBSDbArg) ? -+ ArgType(S.Context.IntTy) : ArgType::CPointerTy; -+ if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType())) -+ EmitFormatDiagnostic( -+ S.PDiag(diag::warn_format_conversion_argument_type_mismatch) -+ << AT.getRepresentativeTypeName(S.Context) << Ex->getType() -+ << false << Ex->getSourceRange(), -+ Ex->getLocStart(), /*IsStringLocation*/false, -+ getSpecifierRange(startSpecifier, specifierLen)); -+ -+ // Type check the second argument (char * for both %b and %D) -+ Ex = getDataArg(argIndex + 1); -+ const analyze_printf::ArgType &AT2 = ArgType::CStrTy; -+ if (AT2.isValid() && !AT2.matchesType(S.Context, Ex->getType())) -+ EmitFormatDiagnostic( -+ S.PDiag(diag::warn_format_conversion_argument_type_mismatch) -+ << AT2.getRepresentativeTypeName(S.Context) << Ex->getType() -+ << false << Ex->getSourceRange(), -+ Ex->getLocStart(), /*IsStringLocation*/false, -+ getSpecifierRange(startSpecifier, specifierLen)); -+ -+ return true; -+ } -+ - // Check for using an Objective-C specific conversion specifier - // in a non-ObjC literal. - if (!ObjCContext && CS.isObjCArg()) { -@@ -4007,7 +4045,8 @@ void Sema::CheckFormatString(const StringLiteral * - return; - } - -- if (Type == FST_Printf || Type == FST_NSString) { -+ if (Type == FST_Printf || Type == FST_NSString || -+ Type == FST_FreeBSDKPrintf) { - CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, - numDataArgs, (Type == FST_NSString), - Str, HasVAListArg, Args, format_idx, -@@ -4015,7 +4054,8 @@ void Sema::CheckFormatString(const StringLiteral * - - if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen, - getLangOpts(), -- Context.getTargetInfo())) -+ Context.getTargetInfo(), -+ Type == FST_FreeBSDKPrintf)) - H.DoneProcessing(); - } else if (Type == FST_Scanf) { - CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, numDataArgs, -Index: tools/clang/lib/Sema/SemaDeclAttr.cpp -=================================================================== ---- tools/clang/lib/Sema/SemaDeclAttr.cpp -+++ tools/clang/lib/Sema/SemaDeclAttr.cpp -@@ -2481,6 +2481,7 @@ static FormatAttrKind getFormatAttrKind(StringRef - .Cases("scanf", "printf", "printf0", "strfmon", SupportedFormat) - .Cases("cmn_err", "vcmn_err", "zcmn_err", SupportedFormat) - .Case("kprintf", SupportedFormat) // OpenBSD. -+ .Case("freebsd_kprintf", SupportedFormat) // FreeBSD. - - .Cases("gcc_diag", "gcc_cdiag", "gcc_cxxdiag", "gcc_tdiag", IgnoredFormat) - .Default(InvalidFormat); -Index: tools/clang/test/Sema/attr-format.c -=================================================================== ---- tools/clang/test/Sema/attr-format.c -+++ tools/clang/test/Sema/attr-format.c -@@ -57,8 +57,15 @@ void callnull(void){ - null(0, (int*)0); // expected-warning {{incompatible pointer types}} - } - -+// FreeBSD kernel extensions -+void a3(const char *a, ...) __attribute__((format(freebsd_kprintf, 1,2))); // no-error -+void b3(const char *a, ...) __attribute__((format(freebsd_kprintf, 1,1))); // expected-error {{'format' attribute parameter 3 is out of bounds}} -+void c3(const char *a, ...) __attribute__((format(freebsd_kprintf, 0,2))); // expected-error {{'format' attribute parameter 2 is out of bounds}} -+void d3(const char *a, int c) __attribute__((format(freebsd_kprintf, 1,2))); // expected-error {{format attribute requires variadic function}} -+void e3(char *str, int c, ...) __attribute__((format(freebsd_kprintf, 2,3))); // expected-error {{format argument not a string type}} - - -+ - // PR4470 - int xx_vprintf(const char *, va_list); - -Index: tools/clang/test/Sema/format-strings-freebsd.c -=================================================================== ---- tools/clang/test/Sema/format-strings-freebsd.c -+++ tools/clang/test/Sema/format-strings-freebsd.c -@@ -0,0 +1,40 @@ -+// RUN: %clang_cc1 -fsyntax-only -verify -triple i386-unknown-freebsd %s -+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-unknown-freebsd %s -+ -+// Test FreeBSD kernel printf extensions. -+int freebsd_kernel_printf(const char *, ...) __attribute__((__format__(__freebsd_kprintf__, 1, 2))); -+ -+void check_freebsd_kernel_extensions(int i, long l, char *s) -+{ -+ // %b expects an int and a char * -+ freebsd_kernel_printf("reg=%b\n", i, "\10\2BITTWO\1BITONE\n"); // no-warning -+ freebsd_kernel_printf("reg=%b\n", l, "\10\2BITTWO\1BITONE\n"); // expected-warning{{format specifies type 'int' but the argument has type 'long'}} -+ freebsd_kernel_printf("reg=%b\n", i, l); // expected-warning{{format specifies type 'char *' but the argument has type 'long'}} -+ freebsd_kernel_printf("reg=%b\n", i); // expected-warning{{more '%' conversions than data arguments}} -+ freebsd_kernel_printf("reg=%b\n", i, "\10\2BITTWO\1BITONE\n", l); // expected-warning{{data argument not used by format string}} -+ -+ // %D expects an unsigned char * and a char * -+ freebsd_kernel_printf("%6D", s, ":"); // no-warning -+ freebsd_kernel_printf("%6D", i, ":"); // expected-warning{{format specifies type 'void *' but the argument has type 'int'}} -+ freebsd_kernel_printf("%6D", s, i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} -+ freebsd_kernel_printf("%6D", s); // expected-warning{{more '%' conversions than data arguments}} -+ freebsd_kernel_printf("%6D", s, ":", i); // expected-warning{{data argument not used by format string}} -+ -+ freebsd_kernel_printf("%*D", 42, s, ":"); // no-warning -+ freebsd_kernel_printf("%*D", 42, i, ":"); // expected-warning{{format specifies type 'void *' but the argument has type 'int'}} -+ freebsd_kernel_printf("%*D", 42, s, i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} -+ freebsd_kernel_printf("%*D", 42, s); // expected-warning{{more '%' conversions than data arguments}} -+ freebsd_kernel_printf("%*D", 42, s, ":", i); // expected-warning{{data argument not used by format string}} -+ -+ // %r expects an int -+ freebsd_kernel_printf("%r", i); // no-warning -+ freebsd_kernel_printf("%r", l); // expected-warning{{format specifies type 'int' but the argument has type 'long'}} -+ freebsd_kernel_printf("%lr", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} -+ freebsd_kernel_printf("%lr", l); // no-warning -+ -+ // %y expects an int -+ freebsd_kernel_printf("%y", i); // no-warning -+ freebsd_kernel_printf("%y", l); // expected-warning{{format specifies type 'int' but the argument has type 'long'}} -+ freebsd_kernel_printf("%ly", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} -+ freebsd_kernel_printf("%ly", l); // no-warning -+} diff --git a/contrib/llvm/patches/patch-03-add-CC-aliases.diff b/contrib/llvm/patches/patch-02-add-CC-aliases.diff index 884b14c..884b14c 100644 --- a/contrib/llvm/patches/patch-03-add-CC-aliases.diff +++ b/contrib/llvm/patches/patch-02-add-CC-aliases.diff diff --git a/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff b/contrib/llvm/patches/patch-03-enable-armv6-clrex.diff index 574e3bd..574e3bd 100644 --- a/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff +++ b/contrib/llvm/patches/patch-03-enable-armv6-clrex.diff diff --git a/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff b/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff deleted file mode 100644 index 5f31623..0000000 --- a/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff +++ /dev/null @@ -1,31 +0,0 @@ -Add an llvm option to enable/disable running the global value numbering -optimization pass. Disabling this pass helps to minimize the size of -boot2. - -Introduced here: http://svnweb.freebsd.org/changeset/base/274968 - -Index: lib/Transforms/IPO/PassManagerBuilder.cpp -=================================================================== ---- lib/Transforms/IPO/PassManagerBuilder.cpp -+++ lib/Transforms/IPO/PassManagerBuilder.cpp -@@ -78,6 +78,10 @@ static cl::opt<bool> - EnableMLSM("mlsm", cl::init(true), cl::Hidden, - cl::desc("Enable motion of merged load and store")); - -+static cl::opt<bool> EnableGVN("enable-gvn", -+ cl::init(true), cl::Hidden, -+ cl::desc("Run the global value numbering pass")); -+ - PassManagerBuilder::PassManagerBuilder() { - OptLevel = 2; - SizeLevel = 0; -@@ -244,7 +248,8 @@ void PassManagerBuilder::populateModulePassManager - if (OptLevel > 1) { - if (EnableMLSM) - MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds -- MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies -+ if (EnableGVN) -+ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies - } - MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset - MPM.add(createSCCPPass()); // Constant prop with SCCP diff --git a/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff b/contrib/llvm/patches/patch-04-clang-add-mips-triples.diff index 2a66949..2a66949 100644 --- a/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff +++ b/contrib/llvm/patches/patch-04-clang-add-mips-triples.diff diff --git a/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff b/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff deleted file mode 100644 index 57e16d7..0000000 --- a/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff +++ /dev/null @@ -1,1271 +0,0 @@ -Pull in r227752 from upstream llvm trunk (by Michael Kuperstein): - - [X86] Convert esp-relative movs of function arguments to pushes, step 2 - - This moves the transformation introduced in r223757 into a separate MI pass. - This allows it to cover many more cases (not only cases where there must be a - reserved call frame), and perform rudimentary call folding. It still doesn't - have a heuristic, so it is enabled only for optsize/minsize, with stack - alignment <= 8, where it ought to be a fairly clear win. - - (Re-commit of r227728) - - Differential Revision: http://reviews.llvm.org/D6789 - -This helps to get sys/boot/i386/boot2 below the required size again, -when optimizing with -Oz. - -Introduced here: http://svnweb.freebsd.org/changeset/base/278112 - -Index: include/llvm/Target/TargetFrameLowering.h -=================================================================== ---- include/llvm/Target/TargetFrameLowering.h -+++ include/llvm/Target/TargetFrameLowering.h -@@ -193,6 +193,11 @@ class TargetFrameLowering { - return hasReservedCallFrame(MF) || hasFP(MF); - } - -+ // needsFrameIndexResolution - Do we need to perform FI resolution for -+ // this function. Normally, this is required only when the function -+ // has any stack objects. However, targets may want to override this. -+ virtual bool needsFrameIndexResolution(const MachineFunction &MF) const; -+ - /// getFrameIndexOffset - Returns the displacement from the frame register to - /// the stack frame of the specified index. - virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const; -Index: lib/CodeGen/PrologEpilogInserter.cpp -=================================================================== ---- lib/CodeGen/PrologEpilogInserter.cpp -+++ lib/CodeGen/PrologEpilogInserter.cpp -@@ -703,7 +703,8 @@ void PEI::insertPrologEpilogCode(MachineFunction & - /// register references and actual offsets. - /// - void PEI::replaceFrameIndices(MachineFunction &Fn) { -- if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? -+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); -+ if (!TFI.needsFrameIndexResolution(Fn)) return; - - // Store SPAdj at exit of a basic block. - SmallVector<int, 8> SPState; -@@ -769,13 +770,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B - continue; - } - -- // If we are looking at a call sequence, we need to keep track of -- // the SP adjustment made by each instruction in the sequence. -- // This includes both the frame setup/destroy pseudos (handled above), -- // as well as other instructions that have side effects w.r.t the SP. -- if (InsideCallSequence) -- SPAdj += TII.getSPAdjust(I); -- - MachineInstr *MI = I; - bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { -@@ -854,6 +848,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B - break; - } - -+ // If we are looking at a call sequence, we need to keep track of -+ // the SP adjustment made by each instruction in the sequence. -+ // This includes both the frame setup/destroy pseudos (handled above), -+ // as well as other instructions that have side effects w.r.t the SP. -+ // Note that this must come after eliminateFrameIndex, because -+ // if I itself referred to a frame index, we shouldn't count its own -+ // adjustment. -+ if (MI && InsideCallSequence) -+ SPAdj += TII.getSPAdjust(MI); -+ - if (DoIncr && I != BB->end()) ++I; - - // Update register states. -Index: lib/CodeGen/TargetFrameLoweringImpl.cpp -=================================================================== ---- lib/CodeGen/TargetFrameLoweringImpl.cpp -+++ lib/CodeGen/TargetFrameLoweringImpl.cpp -@@ -42,3 +42,8 @@ int TargetFrameLowering::getFrameIndexReference(co - FrameReg = RI->getFrameRegister(MF); - return getFrameIndexOffset(MF, FI); - } -+ -+bool TargetFrameLowering::needsFrameIndexResolution( -+ const MachineFunction &MF) const { -+ return MF.getFrameInfo()->hasStackObjects(); -+} -Index: lib/Target/X86/CMakeLists.txt -=================================================================== ---- lib/Target/X86/CMakeLists.txt -+++ lib/Target/X86/CMakeLists.txt -@@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen) - - set(sources - X86AsmPrinter.cpp -+ X86CallFrameOptimization.cpp - X86FastISel.cpp - X86FloatingPoint.cpp - X86FrameLowering.cpp -Index: lib/Target/X86/X86.h -=================================================================== ---- lib/Target/X86/X86.h -+++ lib/Target/X86/X86.h -@@ -67,6 +67,11 @@ FunctionPass *createX86PadShortFunctions(); - /// to eliminate execution delays in some Atom processors. - FunctionPass *createX86FixupLEAs(); - -+/// createX86CallFrameOptimization - Return a pass that optimizes -+/// the code-size of x86 call sequences. This is done by replacing -+/// esp-relative movs with pushes. -+FunctionPass *createX86CallFrameOptimization(); -+ - } // End llvm namespace - - #endif -Index: lib/Target/X86/X86CallFrameOptimization.cpp -=================================================================== ---- lib/Target/X86/X86CallFrameOptimization.cpp -+++ lib/Target/X86/X86CallFrameOptimization.cpp -@@ -0,0 +1,400 @@ -+//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines a pass that optimizes call sequences on x86. -+// Currently, it converts movs of function parameters onto the stack into -+// pushes. This is beneficial for two main reasons: -+// 1) The push instruction encoding is much smaller than an esp-relative mov -+// 2) It is possible to push memory arguments directly. So, if the -+// the transformation is preformed pre-reg-alloc, it can help relieve -+// register pressure. -+// -+//===----------------------------------------------------------------------===// -+ -+#include <algorithm> -+ -+#include "X86.h" -+#include "X86InstrInfo.h" -+#include "X86Subtarget.h" -+#include "X86MachineFunctionInfo.h" -+#include "llvm/ADT/Statistic.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+#include "llvm/CodeGen/Passes.h" -+#include "llvm/IR/Function.h" -+#include "llvm/Support/Debug.h" -+#include "llvm/Support/raw_ostream.h" -+#include "llvm/Target/TargetInstrInfo.h" -+ -+using namespace llvm; -+ -+#define DEBUG_TYPE "x86-cf-opt" -+ -+cl::opt<bool> NoX86CFOpt("no-x86-call-frame-opt", -+ cl::desc("Avoid optimizing x86 call frames for size"), -+ cl::init(false), cl::Hidden); -+ -+namespace { -+class X86CallFrameOptimization : public MachineFunctionPass { -+public: -+ X86CallFrameOptimization() : MachineFunctionPass(ID) {} -+ -+ bool runOnMachineFunction(MachineFunction &MF) override; -+ -+private: -+ bool shouldPerformTransformation(MachineFunction &MF); -+ -+ bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator I); -+ -+ MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup, -+ unsigned Reg); -+ -+ const char *getPassName() const override { -+ return "X86 Optimize Call Frame"; -+ } -+ -+ const TargetInstrInfo *TII; -+ const TargetFrameLowering *TFL; -+ const MachineRegisterInfo *MRI; -+ static char ID; -+}; -+ -+char X86CallFrameOptimization::ID = 0; -+} -+ -+FunctionPass *llvm::createX86CallFrameOptimization() { -+ return new X86CallFrameOptimization(); -+} -+ -+// This checks whether the transformation is legal and profitable -+bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF) { -+ if (NoX86CFOpt.getValue()) -+ return false; -+ -+ // We currently only support call sequences where *all* parameters. -+ // are passed on the stack. -+ // No point in running this in 64-bit mode, since some arguments are -+ // passed in-register in all common calling conventions, so the pattern -+ // we're looking for will never match. -+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); -+ if (STI.is64Bit()) -+ return false; -+ -+ // You would expect straight-line code between call-frame setup and -+ // call-frame destroy. You would be wrong. There are circumstances (e.g. -+ // CMOV_GR8 expansion of a select that feeds a function call!) where we can -+ // end up with the setup and the destroy in different basic blocks. -+ // This is bad, and breaks SP adjustment. -+ // So, check that all of the frames in the function are closed inside -+ // the same block, and, for good measure, that there are no nested frames. -+ int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); -+ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); -+ for (MachineBasicBlock &BB : MF) { -+ bool InsideFrameSequence = false; -+ for (MachineInstr &MI : BB) { -+ if (MI.getOpcode() == FrameSetupOpcode) { -+ if (InsideFrameSequence) -+ return false; -+ InsideFrameSequence = true; -+ } -+ else if (MI.getOpcode() == FrameDestroyOpcode) { -+ if (!InsideFrameSequence) -+ return false; -+ InsideFrameSequence = false; -+ } -+ } -+ -+ if (InsideFrameSequence) -+ return false; -+ } -+ -+ // Now that we know the transformation is legal, check if it is -+ // profitable. -+ // TODO: Add a heuristic that actually looks at the function, -+ // and enable this for more cases. -+ -+ // This transformation is always a win when we expected to have -+ // a reserved call frame. Under other circumstances, it may be either -+ // a win or a loss, and requires a heuristic. -+ // For now, enable it only for the relatively clear win cases. -+ bool CannotReserveFrame = MF.getFrameInfo()->hasVarSizedObjects(); -+ if (CannotReserveFrame) -+ return true; -+ -+ // For now, don't even try to evaluate the profitability when -+ // not optimizing for size. -+ AttributeSet FnAttrs = MF.getFunction()->getAttributes(); -+ bool OptForSize = -+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, -+ Attribute::OptimizeForSize) || -+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); -+ -+ if (!OptForSize) -+ return false; -+ -+ // Stack re-alignment can make this unprofitable even in terms of size. -+ // As mentioned above, a better heuristic is needed. For now, don't do this -+ // when the required alignment is above 8. (4 would be the safe choice, but -+ // some experimentation showed 8 is generally good). -+ if (TFL->getStackAlignment() > 8) -+ return false; -+ -+ return true; -+} -+ -+bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { -+ TII = MF.getSubtarget().getInstrInfo(); -+ TFL = MF.getSubtarget().getFrameLowering(); -+ MRI = &MF.getRegInfo(); -+ -+ if (!shouldPerformTransformation(MF)) -+ return false; -+ -+ int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); -+ -+ bool Changed = false; -+ -+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) -+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) -+ if (I->getOpcode() == FrameSetupOpcode) -+ Changed |= adjustCallSequence(MF, *BB, I); -+ -+ return Changed; -+} -+ -+bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, -+ MachineBasicBlock &MBB, -+ MachineBasicBlock::iterator I) { -+ -+ // Check that this particular call sequence is amenable to the -+ // transformation. -+ const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( -+ MF.getSubtarget().getRegisterInfo()); -+ unsigned StackPtr = RegInfo.getStackRegister(); -+ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); -+ -+ // We expect to enter this at the beginning of a call sequence -+ assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); -+ MachineBasicBlock::iterator FrameSetup = I++; -+ -+ -+ // For globals in PIC mode, we can have some LEAs here. -+ // Ignore them, they don't bother us. -+ // TODO: Extend this to something that covers more cases. -+ while (I->getOpcode() == X86::LEA32r) -+ ++I; -+ -+ // We expect a copy instruction here. -+ // TODO: The copy instruction is a lowering artifact. -+ // We should also support a copy-less version, where the stack -+ // pointer is used directly. -+ if (!I->isCopy() || !I->getOperand(0).isReg()) -+ return false; -+ MachineBasicBlock::iterator SPCopy = I++; -+ StackPtr = SPCopy->getOperand(0).getReg(); -+ -+ // Scan the call setup sequence for the pattern we're looking for. -+ // We only handle a simple case - a sequence of MOV32mi or MOV32mr -+ // instructions, that push a sequence of 32-bit values onto the stack, with -+ // no gaps between them. -+ SmallVector<MachineInstr*, 4> MovVector(4, nullptr); -+ unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4; -+ if (MaxAdjust > 4) -+ MovVector.resize(MaxAdjust, nullptr); -+ -+ do { -+ int Opcode = I->getOpcode(); -+ if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) -+ break; -+ -+ // We only want movs of the form: -+ // movl imm/r32, k(%esp) -+ // If we run into something else, bail. -+ // Note that AddrBaseReg may, counter to its name, not be a register, -+ // but rather a frame index. -+ // TODO: Support the fi case. This should probably work now that we -+ // have the infrastructure to track the stack pointer within a call -+ // sequence. -+ if (!I->getOperand(X86::AddrBaseReg).isReg() || -+ (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || -+ !I->getOperand(X86::AddrScaleAmt).isImm() || -+ (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || -+ (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || -+ (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || -+ !I->getOperand(X86::AddrDisp).isImm()) -+ return false; -+ -+ int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); -+ assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); -+ -+ // We really don't want to consider the unaligned case. -+ if (StackDisp % 4) -+ return false; -+ StackDisp /= 4; -+ -+ assert((size_t)StackDisp < MovVector.size() && -+ "Function call has more parameters than the stack is adjusted for."); -+ -+ // If the same stack slot is being filled twice, something's fishy. -+ if (MovVector[StackDisp] != nullptr) -+ return false; -+ MovVector[StackDisp] = I; -+ -+ ++I; -+ } while (I != MBB.end()); -+ -+ // We now expect the end of the sequence - a call and a stack adjust. -+ if (I == MBB.end()) -+ return false; -+ -+ // For PCrel calls, we expect an additional COPY of the basereg. -+ // If we find one, skip it. -+ if (I->isCopy()) { -+ if (I->getOperand(1).getReg() == -+ MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg()) -+ ++I; -+ else -+ return false; -+ } -+ -+ if (!I->isCall()) -+ return false; -+ MachineBasicBlock::iterator Call = I; -+ if ((++I)->getOpcode() != FrameDestroyOpcode) -+ return false; -+ -+ // Now, go through the vector, and see that we don't have any gaps, -+ // but only a series of 32-bit MOVs. -+ -+ int64_t ExpectedDist = 0; -+ auto MMI = MovVector.begin(), MME = MovVector.end(); -+ for (; MMI != MME; ++MMI, ExpectedDist += 4) -+ if (*MMI == nullptr) -+ break; -+ -+ // If the call had no parameters, do nothing -+ if (!ExpectedDist) -+ return false; -+ -+ // We are either at the last parameter, or a gap. -+ // Make sure it's not a gap -+ for (; MMI != MME; ++MMI) -+ if (*MMI != nullptr) -+ return false; -+ -+ // Ok, we can in fact do the transformation for this call. -+ // Do not remove the FrameSetup instruction, but adjust the parameters. -+ // PEI will end up finalizing the handling of this. -+ FrameSetup->getOperand(1).setImm(ExpectedDist); -+ -+ DebugLoc DL = I->getDebugLoc(); -+ // Now, iterate through the vector in reverse order, and replace the movs -+ // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to -+ // replace uses. -+ for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) { -+ MachineBasicBlock::iterator MOV = *MovVector[Idx]; -+ MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); -+ if (MOV->getOpcode() == X86::MOV32mi) { -+ unsigned PushOpcode = X86::PUSHi32; -+ // If the operand is a small (8-bit) immediate, we can use a -+ // PUSH instruction with a shorter encoding. -+ // Note that isImm() may fail even though this is a MOVmi, because -+ // the operand can also be a symbol. -+ if (PushOp.isImm()) { -+ int64_t Val = PushOp.getImm(); -+ if (isInt<8>(Val)) -+ PushOpcode = X86::PUSH32i8; -+ } -+ BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp); -+ } else { -+ unsigned int Reg = PushOp.getReg(); -+ -+ // If PUSHrmm is not slow on this target, try to fold the source of the -+ // push into the instruction. -+ const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>(); -+ bool SlowPUSHrmm = ST.isAtom() || ST.isSLM(); -+ -+ // Check that this is legal to fold. Right now, we're extremely -+ // conservative about that. -+ MachineInstr *DefMov = nullptr; -+ if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { -+ MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm)); -+ -+ unsigned NumOps = DefMov->getDesc().getNumOperands(); -+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) -+ Push->addOperand(DefMov->getOperand(i)); -+ -+ DefMov->eraseFromParent(); -+ } else { -+ BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr(); -+ } -+ } -+ -+ MBB.erase(MOV); -+ } -+ -+ // The stack-pointer copy is no longer used in the call sequences. -+ // There should not be any other users, but we can't commit to that, so: -+ if (MRI->use_empty(SPCopy->getOperand(0).getReg())) -+ SPCopy->eraseFromParent(); -+ -+ // Once we've done this, we need to make sure PEI doesn't assume a reserved -+ // frame. -+ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); -+ FuncInfo->setHasPushSequences(true); -+ -+ return true; -+} -+ -+MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( -+ MachineBasicBlock::iterator FrameSetup, unsigned Reg) { -+ // Do an extremely restricted form of load folding. -+ // ISel will often create patterns like: -+ // movl 4(%edi), %eax -+ // movl 8(%edi), %ecx -+ // movl 12(%edi), %edx -+ // movl %edx, 8(%esp) -+ // movl %ecx, 4(%esp) -+ // movl %eax, (%esp) -+ // call -+ // Get rid of those with prejudice. -+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) -+ return nullptr; -+ -+ // Make sure this is the only use of Reg. -+ if (!MRI->hasOneNonDBGUse(Reg)) -+ return nullptr; -+ -+ MachineBasicBlock::iterator DefMI = MRI->getVRegDef(Reg); -+ -+ // Make sure the def is a MOV from memory. -+ // If the def is an another block, give up. -+ if (DefMI->getOpcode() != X86::MOV32rm || -+ DefMI->getParent() != FrameSetup->getParent()) -+ return nullptr; -+ -+ // Be careful with movs that load from a stack slot, since it may get -+ // resolved incorrectly. -+ // TODO: Again, we already have the infrastructure, so this should work. -+ if (!DefMI->getOperand(1).isReg()) -+ return nullptr; -+ -+ // Now, make sure everything else up until the ADJCALLSTACK is a sequence -+ // of MOVs. To be less conservative would require duplicating a lot of the -+ // logic from PeepholeOptimizer. -+ // FIXME: A possibly better approach would be to teach the PeepholeOptimizer -+ // to be smarter about folding into pushes. -+ for (auto I = DefMI; I != FrameSetup; ++I) -+ if (I->getOpcode() != X86::MOV32rm) -+ return nullptr; -+ -+ return DefMI; -+} -Index: lib/Target/X86/X86FastISel.cpp -=================================================================== ---- lib/Target/X86/X86FastISel.cpp -+++ lib/Target/X86/X86FastISel.cpp -@@ -2735,7 +2735,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo & - // Issue CALLSEQ_START - unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) -- .addImm(NumBytes); -+ .addImm(NumBytes).addImm(0); - - // Walk the register/memloc assignments, inserting copies/loads. - const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>( -Index: lib/Target/X86/X86FrameLowering.cpp -=================================================================== ---- lib/Target/X86/X86FrameLowering.cpp -+++ lib/Target/X86/X86FrameLowering.cpp -@@ -38,9 +38,36 @@ using namespace llvm; - extern cl::opt<bool> ForceStackAlign; - - bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { -- return !MF.getFrameInfo()->hasVarSizedObjects(); -+ return !MF.getFrameInfo()->hasVarSizedObjects() && -+ !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); - } - -+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the -+/// call frame pseudos can be simplified. Having a FP, as in the default -+/// implementation, is not sufficient here since we can't always use it. -+/// Use a more nuanced condition. -+bool -+X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { -+ const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *> -+ (MF.getSubtarget().getRegisterInfo()); -+ return hasReservedCallFrame(MF) || -+ (hasFP(MF) && !TRI->needsStackRealignment(MF)) -+ || TRI->hasBasePointer(MF); -+} -+ -+// needsFrameIndexResolution - Do we need to perform FI resolution for -+// this function. Normally, this is required only when the function -+// has any stack objects. However, FI resolution actually has another job, -+// not apparent from the title - it resolves callframesetup/destroy -+// that were not simplified earlier. -+// So, this is required for x86 functions that have push sequences even -+// when there are no stack objects. -+bool -+X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { -+ return MF.getFrameInfo()->hasStackObjects() || -+ MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); -+} -+ - /// hasFP - Return true if the specified function should have a dedicated frame - /// pointer register. This is true if the function has variable sized allocas - /// or if frame pointer elimination is disabled. -@@ -93,16 +120,6 @@ static unsigned getANDriOpcode(bool IsLP64, int64_ - return X86::AND32ri; - } - --static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) { -- // We don't support LP64 for now. -- assert(!IsLP64); -- -- if (MO.isImm() && isInt<8>(MO.getImm())) -- return X86::PUSH32i8; -- -- return X86::PUSHi32;; --} -- - static unsigned getLEArOpcode(unsigned IsLP64) { - return IsLP64 ? X86::LEA64r : X86::LEA32r; - } -@@ -1882,100 +1899,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi - #endif - } - --bool X86FrameLowering:: --convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB, -- MachineBasicBlock::iterator I, uint64_t Amount) const { -- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); -- const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( -- MF.getSubtarget().getRegisterInfo()); -- unsigned StackPtr = RegInfo.getStackRegister(); -- -- // Scan the call setup sequence for the pattern we're looking for. -- // We only handle a simple case now - a sequence of MOV32mi or MOV32mr -- // instructions, that push a sequence of 32-bit values onto the stack, with -- // no gaps. -- std::map<int64_t, MachineBasicBlock::iterator> MovMap; -- do { -- int Opcode = I->getOpcode(); -- if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) -- break; -- -- // We only want movs of the form: -- // movl imm/r32, k(%ecx) -- // If we run into something else, bail -- // Note that AddrBaseReg may, counterintuitively, not be a register... -- if (!I->getOperand(X86::AddrBaseReg).isReg() || -- (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || -- !I->getOperand(X86::AddrScaleAmt).isImm() || -- (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || -- (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || -- (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || -- !I->getOperand(X86::AddrDisp).isImm()) -- return false; -- -- int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); -- -- // We don't want to consider the unaligned case. -- if (StackDisp % 4) -- return false; -- -- // If the same stack slot is being filled twice, something's fishy. -- if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second) -- return false; -- -- ++I; -- } while (I != MBB.end()); -- -- // We now expect the end of the sequence - a call and a stack adjust. -- if (I == MBB.end()) -- return false; -- if (!I->isCall()) -- return false; -- MachineBasicBlock::iterator Call = I; -- if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode()) -- return false; -- -- // Now, go through the map, and see that we don't have any gaps, -- // but only a series of 32-bit MOVs. -- // Since std::map provides ordered iteration, the original order -- // of the MOVs doesn't matter. -- int64_t ExpectedDist = 0; -- for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME; -- ++MMI, ExpectedDist += 4) -- if (MMI->first != ExpectedDist) -- return false; -- -- // Ok, everything looks fine. Do the transformation. -- DebugLoc DL = I->getDebugLoc(); -- -- // It's possible the original stack adjustment amount was larger than -- // that done by the pushes. If so, we still need a SUB. -- Amount -= ExpectedDist; -- if (Amount) { -- MachineInstr* Sub = BuildMI(MBB, Call, DL, -- TII.get(getSUBriOpcode(false, Amount)), StackPtr) -- .addReg(StackPtr).addImm(Amount); -- Sub->getOperand(3).setIsDead(); -- } -- -- // Now, iterate through the map in reverse order, and replace the movs -- // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses. -- for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) { -- MachineBasicBlock::iterator MOV = MMI->second; -- MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); -- -- // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size -- int PushOpcode = X86::PUSH32r; -- if (MOV->getOpcode() == X86::MOV32mi) -- PushOpcode = getPUSHiOpcode(false, PushOp); -- -- BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp); -- MBB.erase(MOV); -- } -- -- return true; --} -- - void X86FrameLowering:: - eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { -@@ -1990,7 +1913,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, - bool IsLP64 = STI.isTarget64BitLP64(); - DebugLoc DL = I->getDebugLoc(); - uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; -- uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; -+ uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; - I = MBB.erase(I); - - if (!reserveCallFrame) { -@@ -2010,24 +1933,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, - Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - - MachineInstr *New = nullptr; -- if (Opcode == TII.getCallFrameSetupOpcode()) { -- // Try to convert movs to the stack into pushes. -- // We currently only look for a pattern that appears in 32-bit -- // calling conventions. -- if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount)) -- return; - -- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), -- StackPtr) -- .addReg(StackPtr) -- .addImm(Amount); -- } else { -- assert(Opcode == TII.getCallFrameDestroyOpcode()); -+ // Factor out the amount that gets handled inside the sequence -+ // (Pushes of argument for frame setup, callee pops for frame destroy) -+ Amount -= InternalAmt; - -- // Factor out the amount the callee already popped. -- Amount -= CalleeAmt; -+ if (Amount) { -+ if (Opcode == TII.getCallFrameSetupOpcode()) { -+ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr) -+ .addReg(StackPtr).addImm(Amount); -+ } else { -+ assert(Opcode == TII.getCallFrameDestroyOpcode()); - -- if (Amount) { - unsigned Opc = getADDriOpcode(IsLP64, Amount); - New = BuildMI(MF, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(Amount); -@@ -2045,13 +1962,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, - return; - } - -- if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { -+ if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) { - // If we are performing frame pointer elimination and if the callee pops - // something off the stack pointer, add it back. We do this until we have - // more advanced stack pointer tracking ability. -- unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); -+ unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt); - MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) -- .addReg(StackPtr).addImm(CalleeAmt); -+ .addReg(StackPtr).addImm(InternalAmt); - - // The EFLAGS implicit def is dead. - New->getOperand(3).setIsDead(); -Index: lib/Target/X86/X86FrameLowering.h -=================================================================== ---- lib/Target/X86/X86FrameLowering.h -+++ lib/Target/X86/X86FrameLowering.h -@@ -66,6 +66,8 @@ class X86FrameLowering : public TargetFrameLowerin - - bool hasFP(const MachineFunction &MF) const override; - bool hasReservedCallFrame(const MachineFunction &MF) const override; -+ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; -+ bool needsFrameIndexResolution(const MachineFunction &MF) const override; - - int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; - int getFrameIndexReference(const MachineFunction &MF, int FI, -Index: lib/Target/X86/X86InstrCompiler.td -=================================================================== ---- lib/Target/X86/X86InstrCompiler.td -+++ lib/Target/X86/X86InstrCompiler.td -@@ -43,9 +43,9 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses - // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become - // sub / add which can clobber EFLAGS. - let Defs = [ESP, EFLAGS], Uses = [ESP] in { --def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), -+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), - "#ADJCALLSTACKDOWN", -- [(X86callseq_start timm:$amt)]>, -+ []>, - Requires<[NotLP64]>; - def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), - "#ADJCALLSTACKUP", -@@ -52,7 +52,10 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins - [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[NotLP64]>; - } -+def : Pat<(X86callseq_start timm:$amt1), -+ (ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>; - -+ - // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into - // a stack adjustment and the codegen must know that they may modify the stack - // pointer before prolog-epilog rewriting occurs. -@@ -59,9 +62,9 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins - // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become - // sub / add which can clobber EFLAGS. - let Defs = [RSP, EFLAGS], Uses = [RSP] in { --def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt), -+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), - "#ADJCALLSTACKDOWN", -- [(X86callseq_start timm:$amt)]>, -+ []>, - Requires<[IsLP64]>; - def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), - "#ADJCALLSTACKUP", -@@ -68,9 +71,10 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins - [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[IsLP64]>; - } -+def : Pat<(X86callseq_start timm:$amt1), -+ (ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>; - - -- - // x86-64 va_start lowering magic. - let usesCustomInserter = 1, Defs = [EFLAGS] in { - def VASTART_SAVE_XMM_REGS : I<0, Pseudo, -Index: lib/Target/X86/X86InstrInfo.cpp -=================================================================== ---- lib/Target/X86/X86InstrInfo.cpp -+++ lib/Target/X86/X86InstrInfo.cpp -@@ -1692,6 +1692,58 @@ X86InstrInfo::isCoalescableExtInstr(const MachineI - return false; - } - -+int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const { -+ const MachineFunction *MF = MI->getParent()->getParent(); -+ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); -+ -+ if (MI->getOpcode() == getCallFrameSetupOpcode() || -+ MI->getOpcode() == getCallFrameDestroyOpcode()) { -+ unsigned StackAlign = TFI->getStackAlignment(); -+ int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign * -+ StackAlign; -+ -+ SPAdj -= MI->getOperand(1).getImm(); -+ -+ if (MI->getOpcode() == getCallFrameSetupOpcode()) -+ return SPAdj; -+ else -+ return -SPAdj; -+ } -+ -+ // To know whether a call adjusts the stack, we need information -+ // that is bound to the following ADJCALLSTACKUP pseudo. -+ // Look for the next ADJCALLSTACKUP that follows the call. -+ if (MI->isCall()) { -+ const MachineBasicBlock* MBB = MI->getParent(); -+ auto I = ++MachineBasicBlock::const_iterator(MI); -+ for (auto E = MBB->end(); I != E; ++I) { -+ if (I->getOpcode() == getCallFrameDestroyOpcode() || -+ I->isCall()) -+ break; -+ } -+ -+ // If we could not find a frame destroy opcode, then it has already -+ // been simplified, so we don't care. -+ if (I->getOpcode() != getCallFrameDestroyOpcode()) -+ return 0; -+ -+ return -(I->getOperand(1).getImm()); -+ } -+ -+ // Currently handle only PUSHes we can reasonably expect to see -+ // in call sequences -+ switch (MI->getOpcode()) { -+ default: -+ return 0; -+ case X86::PUSH32i8: -+ case X86::PUSH32r: -+ case X86::PUSH32rmm: -+ case X86::PUSH32rmr: -+ case X86::PUSHi32: -+ return 4; -+ } -+} -+ - /// isFrameOperand - Return true and the FrameIndex if the specified - /// operand and follow operands form a reference to the stack frame. - bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, -Index: lib/Target/X86/X86InstrInfo.h -=================================================================== ---- lib/Target/X86/X86InstrInfo.h -+++ lib/Target/X86/X86InstrInfo.h -@@ -175,6 +175,11 @@ class X86InstrInfo final : public X86GenInstrInfo - /// - const X86RegisterInfo &getRegisterInfo() const { return RI; } - -+ /// getSPAdjust - This returns the stack pointer adjustment made by -+ /// this instruction. For x86, we need to handle more complex call -+ /// sequences involving PUSHes. -+ int getSPAdjust(const MachineInstr *MI) const override; -+ - /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" - /// extension instruction. That is, it's like a copy where it's legal for the - /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns -Index: lib/Target/X86/X86MachineFunctionInfo.h -=================================================================== ---- lib/Target/X86/X86MachineFunctionInfo.h -+++ lib/Target/X86/X86MachineFunctionInfo.h -@@ -77,6 +77,9 @@ class X86MachineFunctionInfo : public MachineFunct - unsigned ArgumentStackSize; - /// NumLocalDynamics - Number of local-dynamic TLS accesses. - unsigned NumLocalDynamics; -+ /// HasPushSequences - Keeps track of whether this function uses sequences -+ /// of pushes to pass function parameters. -+ bool HasPushSequences; - - private: - /// ForwardedMustTailRegParms - A list of virtual and physical registers -@@ -97,7 +100,8 @@ class X86MachineFunctionInfo : public MachineFunct - VarArgsGPOffset(0), - VarArgsFPOffset(0), - ArgumentStackSize(0), -- NumLocalDynamics(0) {} -+ NumLocalDynamics(0), -+ HasPushSequences(false) {} - - explicit X86MachineFunctionInfo(MachineFunction &MF) - : ForceFramePointer(false), -@@ -113,11 +117,15 @@ class X86MachineFunctionInfo : public MachineFunct - VarArgsGPOffset(0), - VarArgsFPOffset(0), - ArgumentStackSize(0), -- NumLocalDynamics(0) {} -+ NumLocalDynamics(0), -+ HasPushSequences(false) {} - - bool getForceFramePointer() const { return ForceFramePointer;} - void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } - -+ bool getHasPushSequences() const { return HasPushSequences; } -+ void setHasPushSequences(bool HasPush) { HasPushSequences = HasPush; } -+ - bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; } - void setRestoreBasePointer(const MachineFunction *MF); - int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; } -Index: lib/Target/X86/X86RegisterInfo.cpp -=================================================================== ---- lib/Target/X86/X86RegisterInfo.cpp -+++ lib/Target/X86/X86RegisterInfo.cpp -@@ -468,8 +468,6 @@ void - X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { -- assert(SPAdj == 0 && "Unexpected"); -- - MachineInstr &MI = *II; - MachineFunction &MF = *MI.getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); -@@ -506,6 +504,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicB - } else - FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex); - -+ if (BasePtr == StackPtr) -+ FIOffset += SPAdj; -+ - // The frame index format for stackmaps and patchpoints is different from the - // X86 format. It only has a FI and an offset. - if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { -Index: lib/Target/X86/X86TargetMachine.cpp -=================================================================== ---- lib/Target/X86/X86TargetMachine.cpp -+++ lib/Target/X86/X86TargetMachine.cpp -@@ -154,6 +154,7 @@ class X86PassConfig : public TargetPassConfig { - void addIRPasses() override; - bool addInstSelector() override; - bool addILPOpts() override; -+ void addPreRegAlloc() override; - void addPostRegAlloc() override; - void addPreEmitPass() override; - }; -@@ -187,6 +188,10 @@ bool X86PassConfig::addILPOpts() { - return true; - } - -+void X86PassConfig::addPreRegAlloc() { -+ addPass(createX86CallFrameOptimization()); -+} -+ - void X86PassConfig::addPostRegAlloc() { - addPass(createX86FloatingPointStackifierPass()); - } -Index: test/CodeGen/X86/inalloca-invoke.ll -=================================================================== ---- test/CodeGen/X86/inalloca-invoke.ll -+++ test/CodeGen/X86/inalloca-invoke.ll -@@ -31,7 +31,7 @@ blah: - to label %invoke.cont unwind label %lpad - - ; Uses end as sret param. --; CHECK: movl %[[end]], (%esp) -+; CHECK: pushl %[[end]] - ; CHECK: calll _plus - - invoke.cont: -Index: test/CodeGen/X86/movtopush.ll -=================================================================== ---- test/CodeGen/X86/movtopush.ll -+++ test/CodeGen/X86/movtopush.ll -@@ -1,10 +1,12 @@ - ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL -+; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64 - ; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED -+ - declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) - declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d) - - ; Here, we should have a reserved frame, so we don't expect pushes --; NORMAL-LABEL: test1 -+; NORMAL-LABEL: test1: - ; NORMAL: subl $16, %esp - ; NORMAL-NEXT: movl $4, 12(%esp) - ; NORMAL-NEXT: movl $3, 8(%esp) -@@ -11,6 +13,7 @@ declare void @inreg(i32 %a, i32 inreg %b, i32 %c, - ; NORMAL-NEXT: movl $2, 4(%esp) - ; NORMAL-NEXT: movl $1, (%esp) - ; NORMAL-NEXT: call -+; NORMAL-NEXT: addl $16, %esp - define void @test1() { - entry: - call void @good(i32 1, i32 2, i32 3, i32 4) -@@ -17,8 +20,10 @@ entry: - ret void - } - --; Here, we expect a sequence of 4 immediate pushes --; NORMAL-LABEL: test2 -+; We're optimizing for code size, so we should get pushes for x86, -+; even though there is a reserved call frame. -+; Make sure we don't touch x86-64 -+; NORMAL-LABEL: test1b: - ; NORMAL-NOT: subl {{.*}} %esp - ; NORMAL: pushl $4 - ; NORMAL-NEXT: pushl $3 -@@ -25,6 +30,42 @@ entry: - ; NORMAL-NEXT: pushl $2 - ; NORMAL-NEXT: pushl $1 - ; NORMAL-NEXT: call -+; NORMAL-NEXT: addl $16, %esp -+; X64-LABEL: test1b: -+; X64: movl $1, %ecx -+; X64-NEXT: movl $2, %edx -+; X64-NEXT: movl $3, %r8d -+; X64-NEXT: movl $4, %r9d -+; X64-NEXT: callq good -+define void @test1b() optsize { -+entry: -+ call void @good(i32 1, i32 2, i32 3, i32 4) -+ ret void -+} -+ -+; Same as above, but for minsize -+; NORMAL-LABEL: test1c: -+; NORMAL-NOT: subl {{.*}} %esp -+; NORMAL: pushl $4 -+; NORMAL-NEXT: pushl $3 -+; NORMAL-NEXT: pushl $2 -+; NORMAL-NEXT: pushl $1 -+; NORMAL-NEXT: call -+; NORMAL-NEXT: addl $16, %esp -+define void @test1c() minsize { -+entry: -+ call void @good(i32 1, i32 2, i32 3, i32 4) -+ ret void -+} -+ -+; If we have a reserved frame, we should have pushes -+; NORMAL-LABEL: test2: -+; NORMAL-NOT: subl {{.*}} %esp -+; NORMAL: pushl $4 -+; NORMAL-NEXT: pushl $3 -+; NORMAL-NEXT: pushl $2 -+; NORMAL-NEXT: pushl $1 -+; NORMAL-NEXT: call - define void @test2(i32 %k) { - entry: - %a = alloca i32, i32 %k -@@ -34,7 +75,7 @@ entry: - - ; Again, we expect a sequence of 4 immediate pushes - ; Checks that we generate the right pushes for >8bit immediates --; NORMAL-LABEL: test2b -+; NORMAL-LABEL: test2b: - ; NORMAL-NOT: subl {{.*}} %esp - ; NORMAL: pushl $4096 - ; NORMAL-NEXT: pushl $3072 -@@ -41,15 +82,15 @@ entry: - ; NORMAL-NEXT: pushl $2048 - ; NORMAL-NEXT: pushl $1024 - ; NORMAL-NEXT: call --define void @test2b(i32 %k) { -+; NORMAL-NEXT: addl $16, %esp -+define void @test2b() optsize { - entry: -- %a = alloca i32, i32 %k - call void @good(i32 1024, i32 2048, i32 3072, i32 4096) - ret void - } - - ; The first push should push a register --; NORMAL-LABEL: test3 -+; NORMAL-LABEL: test3: - ; NORMAL-NOT: subl {{.*}} %esp - ; NORMAL: pushl $4 - ; NORMAL-NEXT: pushl $3 -@@ -56,15 +97,15 @@ entry: - ; NORMAL-NEXT: pushl $2 - ; NORMAL-NEXT: pushl %e{{..}} - ; NORMAL-NEXT: call --define void @test3(i32 %k) { -+; NORMAL-NEXT: addl $16, %esp -+define void @test3(i32 %k) optsize { - entry: -- %a = alloca i32, i32 %k - call void @good(i32 %k, i32 2, i32 3, i32 4) - ret void - } - - ; We don't support weird calling conventions --; NORMAL-LABEL: test4 -+; NORMAL-LABEL: test4: - ; NORMAL: subl $12, %esp - ; NORMAL-NEXT: movl $4, 8(%esp) - ; NORMAL-NEXT: movl $3, 4(%esp) -@@ -71,16 +112,16 @@ entry: - ; NORMAL-NEXT: movl $1, (%esp) - ; NORMAL-NEXT: movl $2, %eax - ; NORMAL-NEXT: call --define void @test4(i32 %k) { -+; NORMAL-NEXT: addl $12, %esp -+define void @test4() optsize { - entry: -- %a = alloca i32, i32 %k - call void @inreg(i32 1, i32 2, i32 3, i32 4) - ret void - } - --; Check that additional alignment is added when the pushes --; don't add up to the required alignment. --; ALIGNED-LABEL: test5 -+; When there is no reserved call frame, check that additional alignment -+; is added when the pushes don't add up to the required alignment. -+; ALIGNED-LABEL: test5: - ; ALIGNED: subl $16, %esp - ; ALIGNED-NEXT: pushl $4 - ; ALIGNED-NEXT: pushl $3 -@@ -97,7 +138,7 @@ entry: - ; Check that pushing the addresses of globals (Or generally, things that - ; aren't exactly immediates) isn't broken. - ; Fixes PR21878. --; NORMAL-LABEL: test6 -+; NORMAL-LABEL: test6: - ; NORMAL: pushl $_ext - ; NORMAL-NEXT: call - declare void @f(i8*) -@@ -110,3 +151,108 @@ bb: - alloca i32 - ret void - } -+ -+; Check that we fold simple cases into the push -+; NORMAL-LABEL: test7: -+; NORMAL-NOT: subl {{.*}} %esp -+; NORMAL: movl 4(%esp), [[EAX:%e..]] -+; NORMAL-NEXT: pushl $4 -+; NORMAL-NEXT: pushl ([[EAX]]) -+; NORMAL-NEXT: pushl $2 -+; NORMAL-NEXT: pushl $1 -+; NORMAL-NEXT: call -+; NORMAL-NEXT: addl $16, %esp -+define void @test7(i32* %ptr) optsize { -+entry: -+ %val = load i32* %ptr -+ call void @good(i32 1, i32 2, i32 %val, i32 4) -+ ret void -+} -+ -+; But we don't want to fold stack-relative loads into the push, -+; because the offset will be wrong -+; NORMAL-LABEL: test8: -+; NORMAL-NOT: subl {{.*}} %esp -+; NORMAL: movl 4(%esp), [[EAX:%e..]] -+; NORMAL-NEXT: pushl $4 -+; NORMAL-NEXT: pushl [[EAX]] -+; NORMAL-NEXT: pushl $2 -+; NORMAL-NEXT: pushl $1 -+; NORMAL-NEXT: call -+; NORMAL-NEXT: addl $16, %esp -+define void @test8(i32* %ptr) optsize { -+entry: -+ %val = ptrtoint i32* %ptr to i32 -+ call void @good(i32 1, i32 2, i32 %val, i32 4) -+ ret void -+} -+ -+; If one function is using push instructions, and the other isn't -+; (because it has frame-index references), then we must resolve -+; these references correctly. -+; NORMAL-LABEL: test9: -+; NORMAL-NOT: leal (%esp), -+; NORMAL: pushl $4 -+; NORMAL-NEXT: pushl $3 -+; NORMAL-NEXT: pushl $2 -+; NORMAL-NEXT: pushl $1 -+; NORMAL-NEXT: call -+; NORMAL-NEXT: addl $16, %esp -+; NORMAL-NEXT: subl $16, %esp -+; NORMAL-NEXT: leal 16(%esp), [[EAX:%e..]] -+; NORMAL-NEXT: movl [[EAX]], 12(%esp) -+; NORMAL-NEXT: movl $7, 8(%esp) -+; NORMAL-NEXT: movl $6, 4(%esp) -+; NORMAL-NEXT: movl $5, (%esp) -+; NORMAL-NEXT: call -+; NORMAL-NEXT: addl $16, %esp -+define void @test9() optsize { -+entry: -+ %p = alloca i32, align 4 -+ call void @good(i32 1, i32 2, i32 3, i32 4) -+ %0 = ptrtoint i32* %p to i32 -+ call void @good(i32 5, i32 6, i32 7, i32 %0) -+ ret void -+} -+ -+; We can end up with an indirect call which gets reloaded on the spot. -+; Make sure we reference the correct stack slot - we spill into (%esp) -+; and reload from 16(%esp) due to the pushes. -+; NORMAL-LABEL: test10: -+; NORMAL: movl $_good, [[ALLOC:.*]] -+; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]] -+; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill -+; NORMAL: nop -+; NORMAL: pushl $4 -+; NORMAL-NEXT: pushl $3 -+; NORMAL-NEXT: pushl $2 -+; NORMAL-NEXT: pushl $1 -+; NORMAL-NEXT: calll *16(%esp) -+; NORMAL-NEXT: addl $16, %esp -+define void @test10() optsize { -+ %stack_fptr = alloca void (i32, i32, i32, i32)* -+ store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr -+ %good_ptr = load volatile void (i32, i32, i32, i32)** %stack_fptr -+ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() -+ call void (i32, i32, i32, i32)* %good_ptr(i32 1, i32 2, i32 3, i32 4) -+ ret void -+} -+ -+; We can't fold the load from the global into the push because of -+; interference from the store -+; NORMAL-LABEL: test11: -+; NORMAL: movl _the_global, [[EAX:%e..]] -+; NORMAL-NEXT: movl $42, _the_global -+; NORMAL-NEXT: pushl $4 -+; NORMAL-NEXT: pushl $3 -+; NORMAL-NEXT: pushl $2 -+; NORMAL-NEXT: pushl [[EAX]] -+; NORMAL-NEXT: call -+; NORMAL-NEXT: addl $16, %esp -+@the_global = external global i32 -+define void @test11() optsize { -+ %myload = load i32* @the_global -+ store i32 42, i32* @the_global -+ call void @good(i32 %myload, i32 2, i32 3, i32 4) -+ ret void -+} diff --git a/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff b/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff deleted file mode 100644 index 2896899..0000000 --- a/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff +++ /dev/null @@ -1,419 +0,0 @@ -Pull in r230348 from upstream llvm trunk (by Tim Northover): - - ARM: treat [N x i32] and [N x i64] as AAPCS composite types - - The logic is almost there already, with our special homogeneous - aggregate handling. Tweaking it like this allows front-ends to emit - AAPCS compliant code without ever having to count registers or add - discarded padding arguments. - - Only arrays of i32 and i64 are needed to model AAPCS rules, but I - decided to apply the logic to all integer arrays for more consistency. - -This fixes a possible "Unexpected member type for HA" error when -compiling lib/msun/bsdsrc/b_tgamma.c for armv6. - -Reported by: Jakub Palider <jpa@semihalf.com> - -Introduced here: https://svnweb.freebsd.org/changeset/base/280400 - -Index: include/llvm/CodeGen/CallingConvLower.h -=================================================================== ---- include/llvm/CodeGen/CallingConvLower.h -+++ include/llvm/CodeGen/CallingConvLower.h -@@ -122,8 +122,8 @@ class CCValAssign { - // There is no need to differentiate between a pending CCValAssign and other - // kinds, as they are stored in a different list. - static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, -- LocInfo HTP) { -- return getReg(ValNo, ValVT, 0, LocVT, HTP); -+ LocInfo HTP, unsigned ExtraInfo = 0) { -+ return getReg(ValNo, ValVT, ExtraInfo, LocVT, HTP); - } - - void convertToReg(unsigned RegNo) { -@@ -146,6 +146,7 @@ class CCValAssign { - - unsigned getLocReg() const { assert(isRegLoc()); return Loc; } - unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; } -+ unsigned getExtraInfo() const { return Loc; } - MVT getLocVT() const { return LocVT; } - - LocInfo getLocInfo() const { return HTP; } -Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp -=================================================================== ---- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp -+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp -@@ -7429,11 +7429,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo - } - if (Args[i].isNest) - Flags.setNest(); -- if (NeedsRegBlock) { -+ if (NeedsRegBlock) - Flags.setInConsecutiveRegs(); -- if (Value == NumValues - 1) -- Flags.setInConsecutiveRegsLast(); -- } - Flags.setOrigAlign(OriginalAlignment); - - MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); -@@ -7482,6 +7479,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo - CLI.Outs.push_back(MyFlags); - CLI.OutVals.push_back(Parts[j]); - } -+ -+ if (NeedsRegBlock && Value == NumValues - 1) -+ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); - } - } - -@@ -7697,11 +7697,8 @@ void SelectionDAGISel::LowerArguments(const Functi - } - if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) - Flags.setNest(); -- if (NeedsRegBlock) { -+ if (NeedsRegBlock) - Flags.setInConsecutiveRegs(); -- if (Value == NumValues - 1) -- Flags.setInConsecutiveRegsLast(); -- } - Flags.setOrigAlign(OriginalAlignment); - - MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); -@@ -7716,6 +7713,8 @@ void SelectionDAGISel::LowerArguments(const Functi - MyFlags.Flags.setOrigAlign(1); - Ins.push_back(MyFlags); - } -+ if (NeedsRegBlock && Value == NumValues - 1) -+ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); - PartBase += VT.getStoreSize(); - } - } -Index: lib/Target/ARM/ARMCallingConv.h -=================================================================== ---- lib/Target/ARM/ARMCallingConv.h -+++ lib/Target/ARM/ARMCallingConv.h -@@ -160,6 +160,8 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &V - State); - } - -+static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -+ - static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, -@@ -168,81 +170,114 @@ static const uint16_t DRegList[] = { ARM::D0, ARM: - ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; - static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; - -+ - // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA - // has InConsecutiveRegs set, and that the last member also has - // InConsecutiveRegsLast set. We must process all members of the HA before - // we can allocate it, as we need to know the total number of registers that - // will be needed in order to (attempt to) allocate a contiguous block. --static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, -- CCValAssign::LocInfo &LocInfo, -- ISD::ArgFlagsTy &ArgFlags, CCState &State) { -- SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); -+static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, -+ MVT &LocVT, -+ CCValAssign::LocInfo &LocInfo, -+ ISD::ArgFlagsTy &ArgFlags, -+ CCState &State) { -+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); - - // AAPCS HFAs must have 1-4 elements, all of the same type -- assert(PendingHAMembers.size() < 4); -- if (PendingHAMembers.size() > 0) -- assert(PendingHAMembers[0].getLocVT() == LocVT); -+ if (PendingMembers.size() > 0) -+ assert(PendingMembers[0].getLocVT() == LocVT); - - // Add the argument to the list to be allocated once we know the size of the -- // HA -- PendingHAMembers.push_back( -- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); -+ // aggregate. Store the type's required alignmnent as extra info for later: in -+ // the [N x i64] case all trace has been removed by the time we actually get -+ // to do allocation. -+ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo, -+ ArgFlags.getOrigAlign())); - -- if (ArgFlags.isInConsecutiveRegsLast()) { -- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && -- "Homogeneous aggregates must have between 1 and 4 members"); -+ if (!ArgFlags.isInConsecutiveRegsLast()) -+ return true; - -- // Try to allocate a contiguous block of registers, each of the correct -- // size to hold one member. -- ArrayRef<uint16_t> RegList; -- switch (LocVT.SimpleTy) { -- case MVT::f32: -- RegList = SRegList; -- break; -- case MVT::f64: -- RegList = DRegList; -- break; -- case MVT::v2f64: -- RegList = QRegList; -- break; -- default: -- llvm_unreachable("Unexpected member type for HA"); -- break; -- } -+ // Try to allocate a contiguous block of registers, each of the correct -+ // size to hold one member. -+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U); - -- unsigned RegResult = -- State.AllocateRegBlock(RegList, PendingHAMembers.size()); -+ ArrayRef<uint16_t> RegList; -+ switch (LocVT.SimpleTy) { -+ case MVT::i32: { -+ RegList = RRegList; -+ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size()); - -- if (RegResult) { -- for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin(); -- It != PendingHAMembers.end(); ++It) { -- It->convertToReg(RegResult); -- State.addLoc(*It); -- ++RegResult; -- } -- PendingHAMembers.clear(); -- return true; -- } -+ // First consume all registers that would give an unaligned object. Whether -+ // we go on stack or in regs, no-one will be using them in future. -+ unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4; -+ while (RegIdx % RegAlign != 0 && RegIdx < RegList.size()) -+ State.AllocateReg(RegList[RegIdx++]); - -- // Register allocation failed, fall back to the stack -+ break; -+ } -+ case MVT::f32: -+ RegList = SRegList; -+ break; -+ case MVT::f64: -+ RegList = DRegList; -+ break; -+ case MVT::v2f64: -+ RegList = QRegList; -+ break; -+ default: -+ llvm_unreachable("Unexpected member type for block aggregate"); -+ break; -+ } - -- // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp) -- for (unsigned regNo = 0; regNo < 16; ++regNo) -- State.AllocateReg(SRegList[regNo]); -+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); -+ if (RegResult) { -+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin(); -+ It != PendingMembers.end(); ++It) { -+ It->convertToReg(RegResult); -+ State.addLoc(*It); -+ ++RegResult; -+ } -+ PendingMembers.clear(); -+ return true; -+ } - -- unsigned Size = LocVT.getSizeInBits() / 8; -- unsigned Align = std::min(Size, 8U); -+ // Register allocation failed, we'll be needing the stack -+ unsigned Size = LocVT.getSizeInBits() / 8; -+ if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) { -+ // If nothing else has used the stack until this point, a non-HFA aggregate -+ // can be split between regs and stack. -+ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size()); -+ for (auto &It : PendingMembers) { -+ if (RegIdx >= RegList.size()) -+ It.convertToMem(State.AllocateStack(Size, Size)); -+ else -+ It.convertToReg(State.AllocateReg(RegList[RegIdx++])); - -- for (auto It : PendingHAMembers) { -- It.convertToMem(State.AllocateStack(Size, Align)); - State.addLoc(It); - } -+ PendingMembers.clear(); -+ return true; -+ } else if (LocVT != MVT::i32) -+ RegList = SRegList; - -- // All pending members have now been allocated -- PendingHAMembers.clear(); -+ // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core) -+ for (auto Reg : RegList) -+ State.AllocateReg(Reg); -+ -+ for (auto &It : PendingMembers) { -+ It.convertToMem(State.AllocateStack(Size, Align)); -+ State.addLoc(It); -+ -+ // After the first item has been allocated, the rest are packed as tightly -+ // as possible. (E.g. an incoming i64 would have starting Align of 8, but -+ // we'll be allocating a bunch of i32 slots). -+ Align = Size; - } - -- // This will be allocated by the last member of the HA -+ // All pending members have now been allocated -+ PendingMembers.clear(); -+ -+ // This will be allocated by the last member of the aggregate - return true; - } - -Index: lib/Target/ARM/ARMCallingConv.td -=================================================================== ---- lib/Target/ARM/ARMCallingConv.td -+++ lib/Target/ARM/ARMCallingConv.td -@@ -175,7 +175,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, - - // HFAs are passed in a contiguous block of registers, or on the stack -- CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>, -+ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>, - - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, -Index: lib/Target/ARM/ARMISelLowering.cpp -=================================================================== ---- lib/Target/ARM/ARMISelLowering.cpp -+++ lib/Target/ARM/ARMISelLowering.cpp -@@ -11285,7 +11285,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas - return (Members > 0 && Members <= 4); - } - --/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. -+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of -+/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when -+/// passing according to AAPCS rules. - bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( - Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { - if (getEffectiveCallingConv(CallConv, isVarArg) != -@@ -11294,7 +11296,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse - - HABaseType Base = HA_UNKNOWN; - uint64_t Members = 0; -- bool result = isHomogeneousAggregate(Ty, Base, Members); -- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump()); -- return result; -+ bool IsHA = isHomogeneousAggregate(Ty, Base, Members); -+ DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump()); -+ -+ bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); -+ return IsHA || IsIntArray; - } -Index: test/CodeGen/ARM/aggregate-padding.ll -=================================================================== ---- test/CodeGen/ARM/aggregate-padding.ll -+++ test/CodeGen/ARM/aggregate-padding.ll -@@ -0,0 +1,101 @@ -+; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s -+ -+; [2 x i64] should be contiguous when split (e.g. we shouldn't try to align all -+; i32 components to 64 bits). Also makes sure i64 based types are properly -+; aligned on the stack. -+define i64 @test_i64_contiguous_on_stack([8 x double], float, i32 %in, [2 x i64] %arg) nounwind { -+; CHECK-LABEL: test_i64_contiguous_on_stack: -+; CHECK-DAG: ldr [[LO0:r[0-9]+]], [sp, #8] -+; CHECK-DAG: ldr [[HI0:r[0-9]+]], [sp, #12] -+; CHECK-DAG: ldr [[LO1:r[0-9]+]], [sp, #16] -+; CHECK-DAG: ldr [[HI1:r[0-9]+]], [sp, #20] -+; CHECK: adds r0, [[LO0]], [[LO1]] -+; CHECK: adc r1, [[HI0]], [[HI1]] -+ -+ %val1 = extractvalue [2 x i64] %arg, 0 -+ %val2 = extractvalue [2 x i64] %arg, 1 -+ %sum = add i64 %val1, %val2 -+ ret i64 %sum -+} -+ -+; [2 x i64] should try to use looks for 4 regs, not 8 (which might happen if the -+; i64 -> i32, i32 split wasn't handled correctly). -+define i64 @test_2xi64_uses_4_regs([8 x double], float, [2 x i64] %arg) nounwind { -+; CHECK-LABEL: test_2xi64_uses_4_regs: -+; CHECK-DAG: mov r0, r2 -+; CHECK-DAG: mov r1, r3 -+ -+ %val = extractvalue [2 x i64] %arg, 1 -+ ret i64 %val -+} -+ -+; An aggregate should be able to split between registers and stack if there is -+; nothing else on the stack. -+define i32 @test_aggregates_split([8 x double], i32, [4 x i32] %arg) nounwind { -+; CHECK-LABEL: test_aggregates_split: -+; CHECK: ldr [[VAL3:r[0-9]+]], [sp] -+; CHECK: add r0, r1, [[VAL3]] -+ -+ %val0 = extractvalue [4 x i32] %arg, 0 -+ %val3 = extractvalue [4 x i32] %arg, 3 -+ %sum = add i32 %val0, %val3 -+ ret i32 %sum -+} -+ -+; If an aggregate has to be moved entirely onto the stack, nothing should be -+; able to use r0-r3 any more. Also checks that [2 x i64] properly aligned when -+; it uses regs. -+define i32 @test_no_int_backfilling([8 x double], float, i32, [2 x i64], i32 %arg) nounwind { -+; CHECK-LABEL: test_no_int_backfilling: -+; CHECK: ldr r0, [sp, #24] -+ ret i32 %arg -+} -+ -+; Even if the argument was successfully allocated as reg block, there should be -+; no backfillig to r1. -+define i32 @test_no_int_backfilling_regsonly(i32, [1 x i64], i32 %arg) { -+; CHECK-LABEL: test_no_int_backfilling_regsonly: -+; CHECK: ldr r0, [sp] -+ ret i32 %arg -+} -+ -+; If an aggregate has to be moved entirely onto the stack, nothing should be -+; able to use r0-r3 any more. -+define float @test_no_float_backfilling([7 x double], [4 x i32], i32, [4 x double], float %arg) nounwind { -+; CHECK-LABEL: test_no_float_backfilling: -+; CHECK: vldr s0, [sp, #40] -+ ret float %arg -+} -+ -+; They're a bit pointless, but types like [N x i8] should work as well. -+define i8 @test_i8_in_regs(i32, [3 x i8] %arg) { -+; CHECK-LABEL: test_i8_in_regs: -+; CHECK: add r0, r1, r3 -+ %val0 = extractvalue [3 x i8] %arg, 0 -+ %val2 = extractvalue [3 x i8] %arg, 2 -+ %sum = add i8 %val0, %val2 -+ ret i8 %sum -+} -+ -+define i16 @test_i16_split(i32, i32, [3 x i16] %arg) { -+; CHECK-LABEL: test_i16_split: -+; CHECK: ldrh [[VAL2:r[0-9]+]], [sp] -+; CHECK: add r0, r2, [[VAL2]] -+ %val0 = extractvalue [3 x i16] %arg, 0 -+ %val2 = extractvalue [3 x i16] %arg, 2 -+ %sum = add i16 %val0, %val2 -+ ret i16 %sum -+} -+ -+; Beware: on the stack each i16 still gets a 32-bit slot, the array is not -+; packed. -+define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg) { -+; CHECK-LABEL: test_i16_forced_stack: -+; CHECK-DAG: ldrh [[VAL0:r[0-9]+]], [sp, #8] -+; CHECK-DAG: ldrh [[VAL2:r[0-9]+]], [sp, #16] -+; CHECK: add r0, [[VAL0]], [[VAL2]] -+ %val0 = extractvalue [3 x i16] %arg, 0 -+ %val2 = extractvalue [3 x i16] %arg, 2 -+ %sum = add i16 %val0, %val2 -+ ret i16 %sum -+} diff --git a/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff b/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff deleted file mode 100644 index 33ca358..0000000 --- a/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff +++ /dev/null @@ -1,50 +0,0 @@ -Pull in r227115 from upstream clang trunk (by Ben Langmuir): - - Fix assert instantiating string init of static variable - - ... when the variable's type is a typedef of a ConstantArrayType. Just - look through the typedef (and any other sugar). We only use the - constant array type here to get the element count. - -This fixes an assertion failure when building the games/redeclipse port. - -Introduced here: http://svnweb.freebsd.org/changeset/base/281046 - -Index: tools/clang/lib/Sema/SemaInit.cpp -=================================================================== ---- tools/clang/lib/Sema/SemaInit.cpp -+++ tools/clang/lib/Sema/SemaInit.cpp -@@ -149,10 +149,10 @@ static void updateStringLiteralType(Expr *E, QualT - static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, - Sema &S) { - // Get the length of the string as parsed. -- uint64_t StrLength = -- cast<ConstantArrayType>(Str->getType())->getSize().getZExtValue(); -+ auto *ConstantArrayTy = -+ cast<ConstantArrayType>(Str->getType()->getUnqualifiedDesugaredType()); -+ uint64_t StrLength = ConstantArrayTy->getSize().getZExtValue(); - -- - if (const IncompleteArrayType *IAT = dyn_cast<IncompleteArrayType>(AT)) { - // C99 6.7.8p14. We have an array of character type with unknown size - // being initialized to a string literal. -Index: tools/clang/test/SemaTemplate/instantiate-static-var.cpp -=================================================================== ---- tools/clang/test/SemaTemplate/instantiate-static-var.cpp -+++ tools/clang/test/SemaTemplate/instantiate-static-var.cpp -@@ -114,3 +114,15 @@ namespace PR6449 { - template class X1<char>; - - } -+ -+typedef char MyString[100]; -+template <typename T> -+struct StaticVarWithTypedefString { -+ static MyString str; -+}; -+template <typename T> -+MyString StaticVarWithTypedefString<T>::str = ""; -+ -+void testStaticVarWithTypedefString() { -+ (void)StaticVarWithTypedefString<int>::str; -+} |