diff options
Diffstat (limited to 'contrib/llvm/patches')
10 files changed, 2266 insertions, 0 deletions
diff --git a/contrib/llvm/patches/README.TXT b/contrib/llvm/patches/README.TXT new file mode 100644 index 0000000..7bc26d2 --- /dev/null +++ b/contrib/llvm/patches/README.TXT @@ -0,0 +1,16 @@ +This is a set of individual patches, which contain all the customizations to +llvm/clang currently in the FreeBSD base system. These can be applied in +alphabetical order to a pristine llvm/clang 3.6.1 source tree, for example by +doing: + +svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_361/final llvm-3.6.1 +svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_361/final llvm-3.6.1/tools/clang +cd llvm-3.6.1 +for p in /usr/src/contrib/llvm/patches/patch-*.diff; do + patch -p0 -f -F0 -E -i $p -s || break +done + +A number of these consist of hand-written modifications, specifically for +FreeBSD, while most others are cherry pickings off the llvm and clang trunks. +When a new version of llvm/clang is eventually imported, those latter ones will +largely disappear. diff --git a/contrib/llvm/patches/patch-01-freebsd-kprintf.diff b/contrib/llvm/patches/patch-01-freebsd-kprintf.diff new file mode 100644 index 0000000..252b4cd --- /dev/null +++ b/contrib/llvm/patches/patch-01-freebsd-kprintf.diff @@ -0,0 +1,381 @@ +This patch adds support for the FreeBSD kernel specific printf format +specifiers: %b, %D, %r and %y, via a new __freebsd_kprintf__ format +string type. + +Sent upstream as http://reviews.llvm.org/D7154 + +Index: tools/clang/include/clang/Analysis/Analyses/FormatString.h +=================================================================== +--- tools/clang/include/clang/Analysis/Analyses/FormatString.h ++++ tools/clang/include/clang/Analysis/Analyses/FormatString.h +@@ -161,6 +161,12 @@ class ConversionSpecifier { + ObjCObjArg, // '@' + ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, + ++ // FreeBSD kernel specific specifiers. ++ FreeBSDbArg, ++ FreeBSDDArg, ++ FreeBSDrArg, ++ FreeBSDyArg, ++ + // GlibC specific specifiers. + PrintErrno, // 'm' + +@@ -204,7 +210,8 @@ class ConversionSpecifier { + return EndScanList ? EndScanList - Position : 1; + } + +- bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } ++ bool isIntArg() const { return (kind >= IntArgBeg && kind <= IntArgEnd) || ++ kind == FreeBSDrArg || kind == FreeBSDyArg; } + bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } + bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } + const char *toString() const; +@@ -646,7 +653,7 @@ class FormatStringHandler { + + bool ParsePrintfString(FormatStringHandler &H, + const char *beg, const char *end, const LangOptions &LO, +- const TargetInfo &Target); ++ const TargetInfo &Target, bool isFreeBSDKPrintf); + + bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO, + const TargetInfo &Target); +Index: tools/clang/include/clang/Sema/Sema.h +=================================================================== +--- tools/clang/include/clang/Sema/Sema.h ++++ tools/clang/include/clang/Sema/Sema.h +@@ -8567,6 +8567,7 @@ class Sema { + FST_Strftime, + FST_Strfmon, + FST_Kprintf, ++ FST_FreeBSDKPrintf, + FST_Unknown + }; + static FormatStringType GetFormatStringType(const FormatAttr *Format); +Index: tools/clang/lib/Analysis/FormatString.cpp +=================================================================== +--- tools/clang/lib/Analysis/FormatString.cpp ++++ tools/clang/lib/Analysis/FormatString.cpp +@@ -552,6 +552,12 @@ const char *ConversionSpecifier::toString() const + // Objective-C specific specifiers. + case ObjCObjArg: return "@"; + ++ // FreeBSD kernel specific specifiers. ++ case FreeBSDbArg: return "b"; ++ case FreeBSDDArg: return "D"; ++ case FreeBSDrArg: return "r"; ++ case FreeBSDyArg: return "y"; ++ + // GlibC specific specifiers. + case PrintErrno: return "m"; + +@@ -647,6 +653,9 @@ bool FormatSpecifier::hasValidLengthModifier(const + case ConversionSpecifier::XArg: + case ConversionSpecifier::nArg: + return true; ++ case ConversionSpecifier::FreeBSDrArg: ++ case ConversionSpecifier::FreeBSDyArg: ++ return Target.getTriple().isOSFreeBSD(); + default: + return false; + } +@@ -677,6 +686,9 @@ bool FormatSpecifier::hasValidLengthModifier(const + case ConversionSpecifier::ScanListArg: + case ConversionSpecifier::ZArg: + return true; ++ case ConversionSpecifier::FreeBSDrArg: ++ case ConversionSpecifier::FreeBSDyArg: ++ return Target.getTriple().isOSFreeBSD(); + default: + return false; + } +@@ -807,6 +819,10 @@ bool FormatSpecifier::hasStandardConversionSpecifi + case ConversionSpecifier::SArg: + return LangOpt.ObjC1 || LangOpt.ObjC2; + case ConversionSpecifier::InvalidSpecifier: ++ case ConversionSpecifier::FreeBSDbArg: ++ case ConversionSpecifier::FreeBSDDArg: ++ case ConversionSpecifier::FreeBSDrArg: ++ case ConversionSpecifier::FreeBSDyArg: + case ConversionSpecifier::PrintErrno: + case ConversionSpecifier::DArg: + case ConversionSpecifier::OArg: +Index: tools/clang/lib/Analysis/PrintfFormatString.cpp +=================================================================== +--- tools/clang/lib/Analysis/PrintfFormatString.cpp ++++ tools/clang/lib/Analysis/PrintfFormatString.cpp +@@ -55,7 +55,8 @@ static PrintfSpecifierResult ParsePrintfSpecifier( + unsigned &argIndex, + const LangOptions &LO, + const TargetInfo &Target, +- bool Warn) { ++ bool Warn, ++ bool isFreeBSDKPrintf) { + + using namespace clang::analyze_format_string; + using namespace clang::analyze_printf; +@@ -206,9 +207,24 @@ static PrintfSpecifierResult ParsePrintfSpecifier( + case '@': k = ConversionSpecifier::ObjCObjArg; break; + // Glibc specific. + case 'm': k = ConversionSpecifier::PrintErrno; break; ++ // FreeBSD kernel specific. ++ case 'b': ++ if (isFreeBSDKPrintf) ++ k = ConversionSpecifier::FreeBSDbArg; // int followed by char * ++ break; ++ case 'r': ++ if (isFreeBSDKPrintf) ++ k = ConversionSpecifier::FreeBSDrArg; // int ++ break; ++ case 'y': ++ if (isFreeBSDKPrintf) ++ k = ConversionSpecifier::FreeBSDyArg; // int ++ break; + // Apple-specific. + case 'D': +- if (Target.getTriple().isOSDarwin()) ++ if (isFreeBSDKPrintf) ++ k = ConversionSpecifier::FreeBSDDArg; // void * followed by char * ++ else if (Target.getTriple().isOSDarwin()) + k = ConversionSpecifier::DArg; + break; + case 'O': +@@ -228,6 +244,10 @@ static PrintfSpecifierResult ParsePrintfSpecifier( + FS.setConversionSpecifier(CS); + if (CS.consumesDataArgument() && !FS.usesPositionalArg()) + FS.setArgIndex(argIndex++); ++ // FreeBSD kernel specific. ++ if (k == ConversionSpecifier::FreeBSDbArg || ++ k == ConversionSpecifier::FreeBSDDArg) ++ argIndex++; + + if (k == ConversionSpecifier::InvalidSpecifier) { + // Assume the conversion takes one argument. +@@ -240,7 +260,8 @@ bool clang::analyze_format_string::ParsePrintfStri + const char *I, + const char *E, + const LangOptions &LO, +- const TargetInfo &Target) { ++ const TargetInfo &Target, ++ bool isFreeBSDKPrintf) { + + unsigned argIndex = 0; + +@@ -247,7 +268,8 @@ bool clang::analyze_format_string::ParsePrintfStri + // Keep looking for a format specifier until we have exhausted the string. + while (I != E) { + const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, +- LO, Target, true); ++ LO, Target, true, ++ isFreeBSDKPrintf); + // Did a fail-stop error of any kind occur when parsing the specifier? + // If so, don't do any more processing. + if (FSR.shouldStop()) +@@ -276,7 +298,8 @@ bool clang::analyze_format_string::ParseFormatStri + FormatStringHandler H; + while (I != E) { + const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex, +- LO, Target, false); ++ LO, Target, false, ++ false); + // Did a fail-stop error of any kind occur when parsing the specifier? + // If so, don't do any more processing. + if (FSR.shouldStop()) +@@ -674,6 +697,8 @@ bool PrintfSpecifier::hasValidPlusPrefix() const { + case ConversionSpecifier::GArg: + case ConversionSpecifier::aArg: + case ConversionSpecifier::AArg: ++ case ConversionSpecifier::FreeBSDrArg: ++ case ConversionSpecifier::FreeBSDyArg: + return true; + + default: +@@ -699,6 +724,8 @@ bool PrintfSpecifier::hasValidAlternativeForm() co + case ConversionSpecifier::FArg: + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: ++ case ConversionSpecifier::FreeBSDrArg: ++ case ConversionSpecifier::FreeBSDyArg: + return true; + + default: +@@ -729,6 +756,8 @@ bool PrintfSpecifier::hasValidLeadingZeros() const + case ConversionSpecifier::FArg: + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: ++ case ConversionSpecifier::FreeBSDrArg: ++ case ConversionSpecifier::FreeBSDyArg: + return true; + + default: +@@ -753,6 +782,8 @@ bool PrintfSpecifier::hasValidSpacePrefix() const + case ConversionSpecifier::GArg: + case ConversionSpecifier::aArg: + case ConversionSpecifier::AArg: ++ case ConversionSpecifier::FreeBSDrArg: ++ case ConversionSpecifier::FreeBSDyArg: + return true; + + default: +@@ -818,6 +849,8 @@ bool PrintfSpecifier::hasValidPrecision() const { + case ConversionSpecifier::gArg: + case ConversionSpecifier::GArg: + case ConversionSpecifier::sArg: ++ case ConversionSpecifier::FreeBSDrArg: ++ case ConversionSpecifier::FreeBSDyArg: + return true; + + default: +Index: tools/clang/lib/Sema/SemaChecking.cpp +=================================================================== +--- tools/clang/lib/Sema/SemaChecking.cpp ++++ tools/clang/lib/Sema/SemaChecking.cpp +@@ -2603,6 +2603,7 @@ Sema::FormatStringType Sema::GetFormatStringType(c + .Case("strftime", FST_Strftime) + .Case("strfmon", FST_Strfmon) + .Cases("kprintf", "cmn_err", "vcmn_err", "zcmn_err", FST_Kprintf) ++ .Case("freebsd_kprintf", FST_FreeBSDKPrintf) + .Default(FST_Unknown); + } + +@@ -3384,6 +3385,43 @@ CheckPrintfHandler::HandlePrintfSpecifier(const an + CoveredArgs.set(argIndex); + } + ++ // FreeBSD kernel extensions. ++ if (CS.getKind() == ConversionSpecifier::FreeBSDbArg || ++ CS.getKind() == ConversionSpecifier::FreeBSDDArg) { ++ // We need at least two arguments. ++ if (!CheckNumArgs(FS, CS, startSpecifier, specifierLen, argIndex + 1)) ++ return false; ++ ++ // Claim the second argument. ++ CoveredArgs.set(argIndex + 1); ++ ++ // Type check the first argument (int for %b, pointer for %D) ++ const Expr *Ex = getDataArg(argIndex); ++ const analyze_printf::ArgType &AT = ++ (CS.getKind() == ConversionSpecifier::FreeBSDbArg) ? ++ ArgType(S.Context.IntTy) : ArgType::CPointerTy; ++ if (AT.isValid() && !AT.matchesType(S.Context, Ex->getType())) ++ EmitFormatDiagnostic( ++ S.PDiag(diag::warn_format_conversion_argument_type_mismatch) ++ << AT.getRepresentativeTypeName(S.Context) << Ex->getType() ++ << false << Ex->getSourceRange(), ++ Ex->getLocStart(), /*IsStringLocation*/false, ++ getSpecifierRange(startSpecifier, specifierLen)); ++ ++ // Type check the second argument (char * for both %b and %D) ++ Ex = getDataArg(argIndex + 1); ++ const analyze_printf::ArgType &AT2 = ArgType::CStrTy; ++ if (AT2.isValid() && !AT2.matchesType(S.Context, Ex->getType())) ++ EmitFormatDiagnostic( ++ S.PDiag(diag::warn_format_conversion_argument_type_mismatch) ++ << AT2.getRepresentativeTypeName(S.Context) << Ex->getType() ++ << false << Ex->getSourceRange(), ++ Ex->getLocStart(), /*IsStringLocation*/false, ++ getSpecifierRange(startSpecifier, specifierLen)); ++ ++ return true; ++ } ++ + // Check for using an Objective-C specific conversion specifier + // in a non-ObjC literal. + if (!ObjCContext && CS.isObjCArg()) { +@@ -4007,7 +4045,8 @@ void Sema::CheckFormatString(const StringLiteral * + return; + } + +- if (Type == FST_Printf || Type == FST_NSString) { ++ if (Type == FST_Printf || Type == FST_NSString || ++ Type == FST_FreeBSDKPrintf) { + CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, + numDataArgs, (Type == FST_NSString), + Str, HasVAListArg, Args, format_idx, +@@ -4015,7 +4054,8 @@ void Sema::CheckFormatString(const StringLiteral * + + if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen, + getLangOpts(), +- Context.getTargetInfo())) ++ Context.getTargetInfo(), ++ Type == FST_FreeBSDKPrintf)) + H.DoneProcessing(); + } else if (Type == FST_Scanf) { + CheckScanfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg, numDataArgs, +Index: tools/clang/lib/Sema/SemaDeclAttr.cpp +=================================================================== +--- tools/clang/lib/Sema/SemaDeclAttr.cpp ++++ tools/clang/lib/Sema/SemaDeclAttr.cpp +@@ -2481,6 +2481,7 @@ static FormatAttrKind getFormatAttrKind(StringRef + .Cases("scanf", "printf", "printf0", "strfmon", SupportedFormat) + .Cases("cmn_err", "vcmn_err", "zcmn_err", SupportedFormat) + .Case("kprintf", SupportedFormat) // OpenBSD. ++ .Case("freebsd_kprintf", SupportedFormat) // FreeBSD. + + .Cases("gcc_diag", "gcc_cdiag", "gcc_cxxdiag", "gcc_tdiag", IgnoredFormat) + .Default(InvalidFormat); +Index: tools/clang/test/Sema/attr-format.c +=================================================================== +--- tools/clang/test/Sema/attr-format.c ++++ tools/clang/test/Sema/attr-format.c +@@ -57,8 +57,15 @@ void callnull(void){ + null(0, (int*)0); // expected-warning {{incompatible pointer types}} + } + ++// FreeBSD kernel extensions ++void a3(const char *a, ...) __attribute__((format(freebsd_kprintf, 1,2))); // no-error ++void b3(const char *a, ...) __attribute__((format(freebsd_kprintf, 1,1))); // expected-error {{'format' attribute parameter 3 is out of bounds}} ++void c3(const char *a, ...) __attribute__((format(freebsd_kprintf, 0,2))); // expected-error {{'format' attribute parameter 2 is out of bounds}} ++void d3(const char *a, int c) __attribute__((format(freebsd_kprintf, 1,2))); // expected-error {{format attribute requires variadic function}} ++void e3(char *str, int c, ...) __attribute__((format(freebsd_kprintf, 2,3))); // expected-error {{format argument not a string type}} + + ++ + // PR4470 + int xx_vprintf(const char *, va_list); + +Index: tools/clang/test/Sema/format-strings-freebsd.c +=================================================================== +--- tools/clang/test/Sema/format-strings-freebsd.c ++++ tools/clang/test/Sema/format-strings-freebsd.c +@@ -0,0 +1,40 @@ ++// RUN: %clang_cc1 -fsyntax-only -verify -triple i386-unknown-freebsd %s ++// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-unknown-freebsd %s ++ ++// Test FreeBSD kernel printf extensions. ++int freebsd_kernel_printf(const char *, ...) __attribute__((__format__(__freebsd_kprintf__, 1, 2))); ++ ++void check_freebsd_kernel_extensions(int i, long l, char *s) ++{ ++ // %b expects an int and a char * ++ freebsd_kernel_printf("reg=%b\n", i, "\10\2BITTWO\1BITONE\n"); // no-warning ++ freebsd_kernel_printf("reg=%b\n", l, "\10\2BITTWO\1BITONE\n"); // expected-warning{{format specifies type 'int' but the argument has type 'long'}} ++ freebsd_kernel_printf("reg=%b\n", i, l); // expected-warning{{format specifies type 'char *' but the argument has type 'long'}} ++ freebsd_kernel_printf("reg=%b\n", i); // expected-warning{{more '%' conversions than data arguments}} ++ freebsd_kernel_printf("reg=%b\n", i, "\10\2BITTWO\1BITONE\n", l); // expected-warning{{data argument not used by format string}} ++ ++ // %D expects an unsigned char * and a char * ++ freebsd_kernel_printf("%6D", s, ":"); // no-warning ++ freebsd_kernel_printf("%6D", i, ":"); // expected-warning{{format specifies type 'void *' but the argument has type 'int'}} ++ freebsd_kernel_printf("%6D", s, i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} ++ freebsd_kernel_printf("%6D", s); // expected-warning{{more '%' conversions than data arguments}} ++ freebsd_kernel_printf("%6D", s, ":", i); // expected-warning{{data argument not used by format string}} ++ ++ freebsd_kernel_printf("%*D", 42, s, ":"); // no-warning ++ freebsd_kernel_printf("%*D", 42, i, ":"); // expected-warning{{format specifies type 'void *' but the argument has type 'int'}} ++ freebsd_kernel_printf("%*D", 42, s, i); // expected-warning{{format specifies type 'char *' but the argument has type 'int'}} ++ freebsd_kernel_printf("%*D", 42, s); // expected-warning{{more '%' conversions than data arguments}} ++ freebsd_kernel_printf("%*D", 42, s, ":", i); // expected-warning{{data argument not used by format string}} ++ ++ // %r expects an int ++ freebsd_kernel_printf("%r", i); // no-warning ++ freebsd_kernel_printf("%r", l); // expected-warning{{format specifies type 'int' but the argument has type 'long'}} ++ freebsd_kernel_printf("%lr", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} ++ freebsd_kernel_printf("%lr", l); // no-warning ++ ++ // %y expects an int ++ freebsd_kernel_printf("%y", i); // no-warning ++ freebsd_kernel_printf("%y", l); // expected-warning{{format specifies type 'int' but the argument has type 'long'}} ++ freebsd_kernel_printf("%ly", i); // expected-warning{{format specifies type 'long' but the argument has type 'int'}} ++ freebsd_kernel_printf("%ly", l); // no-warning ++} diff --git a/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff b/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff new file mode 100644 index 0000000..f94b9f3 --- /dev/null +++ b/contrib/llvm/patches/patch-02-clang-vendor-suffix.diff @@ -0,0 +1,22 @@ +This patch adds a FreeBSD-specific suffix to clang's version string. This is +usually of the form "(yyyyddmm)", representing the date when the compiler was +last updated. + +Introduced here: http://svnweb.freebsd.org/changeset/base/209107 + +Index: tools/clang/lib/Basic/Version.cpp +=================================================================== +--- tools/clang/lib/Basic/Version.cpp ++++ tools/clang/lib/Basic/Version.cpp +@@ -128,8 +128,10 @@ std::string getClangToolFullVersion(StringRef Tool + OS << ToolName << " version " CLANG_VERSION_STRING " " + << getClangFullRepositoryVersion(); + ++#ifdef CLANG_VENDOR_SUFFIX ++ OS << CLANG_VENDOR_SUFFIX; ++#elif defined(CLANG_VENDOR) + // If vendor supplied, include the base LLVM version as well. +-#ifdef CLANG_VENDOR + OS << " (based on " << BACKEND_PACKAGE_STRING << ")"; + #endif + diff --git a/contrib/llvm/patches/patch-03-add-CC-aliases.diff b/contrib/llvm/patches/patch-03-add-CC-aliases.diff new file mode 100644 index 0000000..884b14c --- /dev/null +++ b/contrib/llvm/patches/patch-03-add-CC-aliases.diff @@ -0,0 +1,23 @@ +This patch adds "CC" and "clang-CC" to the list of program name aliases which +invoke the C++ compiler. + +Introduced here: http://svnweb.freebsd.org/changeset/base/257109 + +Index: tools/clang/tools/driver/driver.cpp +=================================================================== +--- tools/clang/tools/driver/driver.cpp ++++ tools/clang/tools/driver/driver.cpp +@@ -213,11 +213,13 @@ static const DriverSuffix *FindDriverSuffix(String + {"clang", nullptr}, + {"clang++", "--driver-mode=g++"}, + {"clang-c++", "--driver-mode=g++"}, ++ {"clang-CC", "--driver-mode=g++"}, + {"clang-cc", nullptr}, + {"clang-cpp", "--driver-mode=cpp"}, + {"clang-g++", "--driver-mode=g++"}, + {"clang-gcc", nullptr}, + {"clang-cl", "--driver-mode=cl"}, ++ {"CC", "--driver-mode=g++"}, + {"cc", nullptr}, + {"cpp", "--driver-mode=cpp"}, + {"cl", "--driver-mode=cl"}, diff --git a/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff b/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff new file mode 100644 index 0000000..5f31623 --- /dev/null +++ b/contrib/llvm/patches/patch-04-add-llvm-gvn-option.diff @@ -0,0 +1,31 @@ +Add an llvm option to enable/disable running the global value numbering +optimization pass. Disabling this pass helps to minimize the size of +boot2. + +Introduced here: http://svnweb.freebsd.org/changeset/base/274968 + +Index: lib/Transforms/IPO/PassManagerBuilder.cpp +=================================================================== +--- lib/Transforms/IPO/PassManagerBuilder.cpp ++++ lib/Transforms/IPO/PassManagerBuilder.cpp +@@ -78,6 +78,10 @@ static cl::opt<bool> + EnableMLSM("mlsm", cl::init(true), cl::Hidden, + cl::desc("Enable motion of merged load and store")); + ++static cl::opt<bool> EnableGVN("enable-gvn", ++ cl::init(true), cl::Hidden, ++ cl::desc("Run the global value numbering pass")); ++ + PassManagerBuilder::PassManagerBuilder() { + OptLevel = 2; + SizeLevel = 0; +@@ -244,7 +248,8 @@ void PassManagerBuilder::populateModulePassManager + if (OptLevel > 1) { + if (EnableMLSM) + MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds +- MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies ++ if (EnableGVN) ++ MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + } + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP diff --git a/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff b/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff new file mode 100644 index 0000000..574e3bd --- /dev/null +++ b/contrib/llvm/patches/patch-05-enable-armv6-clrex.diff @@ -0,0 +1,20 @@ +For now, enable the clrex instruction for armv6, until upstream +implements this properly. + +Submitted by: rdivacky + +Introduced here: http://svnweb.freebsd.org/changeset/base/275362 + +Index: lib/Target/ARM/ARMInstrInfo.td +=================================================================== +--- lib/Target/ARM/ARMInstrInfo.td ++++ lib/Target/ARM/ARMInstrInfo.td +@@ -4640,7 +4640,7 @@ def STLEXD : AIstlex<0b01, (outs GPR:$Rd), + + def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", + [(int_arm_clrex)]>, +- Requires<[IsARM, HasV7]> { ++ Requires<[IsARM, HasV6]> { + let Inst{31-0} = 0b11110101011111111111000000011111; + } + diff --git a/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff b/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff new file mode 100644 index 0000000..2a66949 --- /dev/null +++ b/contrib/llvm/patches/patch-06-clang-add-mips-triples.diff @@ -0,0 +1,33 @@ +Allow clang to be built for mips/mips64 backend types by adding our mips +triple ids + +This only allows testing and does not change the defaults for mips/mips64. +They still build/use gcc by default. + +Differential Revision: https://reviews.freebsd.org/D1190 +Reviewed by: dim + +Introduced here: http://svnweb.freebsd.org/changeset/base/277423 + +Index: tools/clang/lib/Driver/Tools.cpp +=================================================================== +--- tools/clang/lib/Driver/Tools.cpp ++++ tools/clang/lib/Driver/Tools.cpp +@@ -6652,6 +6652,17 @@ void freebsd::Link::ConstructJob(Compilation &C, c + CmdArgs.push_back("elf32ppc_fbsd"); + } + ++ if (Arg *A = Args.getLastArg(options::OPT_G)) { ++ if (ToolChain.getArch() == llvm::Triple::mips || ++ ToolChain.getArch() == llvm::Triple::mipsel || ++ ToolChain.getArch() == llvm::Triple::mips64 || ++ ToolChain.getArch() == llvm::Triple::mips64el) { ++ StringRef v = A->getValue(); ++ CmdArgs.push_back(Args.MakeArgString("-G" + v)); ++ A->claim(); ++ } ++ } ++ + if (Output.isFilename()) { + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); diff --git a/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff b/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff new file mode 100644 index 0000000..57e16d7 --- /dev/null +++ b/contrib/llvm/patches/patch-07-llvm-r227752-boot2-shrink.diff @@ -0,0 +1,1271 @@ +Pull in r227752 from upstream llvm trunk (by Michael Kuperstein): + + [X86] Convert esp-relative movs of function arguments to pushes, step 2 + + This moves the transformation introduced in r223757 into a separate MI pass. + This allows it to cover many more cases (not only cases where there must be a + reserved call frame), and perform rudimentary call folding. It still doesn't + have a heuristic, so it is enabled only for optsize/minsize, with stack + alignment <= 8, where it ought to be a fairly clear win. + + (Re-commit of r227728) + + Differential Revision: http://reviews.llvm.org/D6789 + +This helps to get sys/boot/i386/boot2 below the required size again, +when optimizing with -Oz. + +Introduced here: http://svnweb.freebsd.org/changeset/base/278112 + +Index: include/llvm/Target/TargetFrameLowering.h +=================================================================== +--- include/llvm/Target/TargetFrameLowering.h ++++ include/llvm/Target/TargetFrameLowering.h +@@ -193,6 +193,11 @@ class TargetFrameLowering { + return hasReservedCallFrame(MF) || hasFP(MF); + } + ++ // needsFrameIndexResolution - Do we need to perform FI resolution for ++ // this function. Normally, this is required only when the function ++ // has any stack objects. However, targets may want to override this. ++ virtual bool needsFrameIndexResolution(const MachineFunction &MF) const; ++ + /// getFrameIndexOffset - Returns the displacement from the frame register to + /// the stack frame of the specified index. + virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const; +Index: lib/CodeGen/PrologEpilogInserter.cpp +=================================================================== +--- lib/CodeGen/PrologEpilogInserter.cpp ++++ lib/CodeGen/PrologEpilogInserter.cpp +@@ -703,7 +703,8 @@ void PEI::insertPrologEpilogCode(MachineFunction & + /// register references and actual offsets. + /// + void PEI::replaceFrameIndices(MachineFunction &Fn) { +- if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? ++ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); ++ if (!TFI.needsFrameIndexResolution(Fn)) return; + + // Store SPAdj at exit of a basic block. + SmallVector<int, 8> SPState; +@@ -769,13 +770,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B + continue; + } + +- // If we are looking at a call sequence, we need to keep track of +- // the SP adjustment made by each instruction in the sequence. +- // This includes both the frame setup/destroy pseudos (handled above), +- // as well as other instructions that have side effects w.r.t the SP. +- if (InsideCallSequence) +- SPAdj += TII.getSPAdjust(I); +- + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { +@@ -854,6 +848,16 @@ void PEI::replaceFrameIndices(MachineBasicBlock *B + break; + } + ++ // If we are looking at a call sequence, we need to keep track of ++ // the SP adjustment made by each instruction in the sequence. ++ // This includes both the frame setup/destroy pseudos (handled above), ++ // as well as other instructions that have side effects w.r.t the SP. ++ // Note that this must come after eliminateFrameIndex, because ++ // if I itself referred to a frame index, we shouldn't count its own ++ // adjustment. ++ if (MI && InsideCallSequence) ++ SPAdj += TII.getSPAdjust(MI); ++ + if (DoIncr && I != BB->end()) ++I; + + // Update register states. +Index: lib/CodeGen/TargetFrameLoweringImpl.cpp +=================================================================== +--- lib/CodeGen/TargetFrameLoweringImpl.cpp ++++ lib/CodeGen/TargetFrameLoweringImpl.cpp +@@ -42,3 +42,8 @@ int TargetFrameLowering::getFrameIndexReference(co + FrameReg = RI->getFrameRegister(MF); + return getFrameIndexOffset(MF, FI); + } ++ ++bool TargetFrameLowering::needsFrameIndexResolution( ++ const MachineFunction &MF) const { ++ return MF.getFrameInfo()->hasStackObjects(); ++} +Index: lib/Target/X86/CMakeLists.txt +=================================================================== +--- lib/Target/X86/CMakeLists.txt ++++ lib/Target/X86/CMakeLists.txt +@@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen) + + set(sources + X86AsmPrinter.cpp ++ X86CallFrameOptimization.cpp + X86FastISel.cpp + X86FloatingPoint.cpp + X86FrameLowering.cpp +Index: lib/Target/X86/X86.h +=================================================================== +--- lib/Target/X86/X86.h ++++ lib/Target/X86/X86.h +@@ -67,6 +67,11 @@ FunctionPass *createX86PadShortFunctions(); + /// to eliminate execution delays in some Atom processors. + FunctionPass *createX86FixupLEAs(); + ++/// createX86CallFrameOptimization - Return a pass that optimizes ++/// the code-size of x86 call sequences. This is done by replacing ++/// esp-relative movs with pushes. ++FunctionPass *createX86CallFrameOptimization(); ++ + } // End llvm namespace + + #endif +Index: lib/Target/X86/X86CallFrameOptimization.cpp +=================================================================== +--- lib/Target/X86/X86CallFrameOptimization.cpp ++++ lib/Target/X86/X86CallFrameOptimization.cpp +@@ -0,0 +1,400 @@ ++//===----- X86CallFrameOptimization.cpp - Optimize x86 call sequences -----===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines a pass that optimizes call sequences on x86. ++// Currently, it converts movs of function parameters onto the stack into ++// pushes. This is beneficial for two main reasons: ++// 1) The push instruction encoding is much smaller than an esp-relative mov ++// 2) It is possible to push memory arguments directly. So, if the ++// the transformation is preformed pre-reg-alloc, it can help relieve ++// register pressure. ++// ++//===----------------------------------------------------------------------===// ++ ++#include <algorithm> ++ ++#include "X86.h" ++#include "X86InstrInfo.h" ++#include "X86Subtarget.h" ++#include "X86MachineFunctionInfo.h" ++#include "llvm/ADT/Statistic.h" ++#include "llvm/CodeGen/MachineFunctionPass.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/CodeGen/Passes.h" ++#include "llvm/IR/Function.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/Target/TargetInstrInfo.h" ++ ++using namespace llvm; ++ ++#define DEBUG_TYPE "x86-cf-opt" ++ ++cl::opt<bool> NoX86CFOpt("no-x86-call-frame-opt", ++ cl::desc("Avoid optimizing x86 call frames for size"), ++ cl::init(false), cl::Hidden); ++ ++namespace { ++class X86CallFrameOptimization : public MachineFunctionPass { ++public: ++ X86CallFrameOptimization() : MachineFunctionPass(ID) {} ++ ++ bool runOnMachineFunction(MachineFunction &MF) override; ++ ++private: ++ bool shouldPerformTransformation(MachineFunction &MF); ++ ++ bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I); ++ ++ MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup, ++ unsigned Reg); ++ ++ const char *getPassName() const override { ++ return "X86 Optimize Call Frame"; ++ } ++ ++ const TargetInstrInfo *TII; ++ const TargetFrameLowering *TFL; ++ const MachineRegisterInfo *MRI; ++ static char ID; ++}; ++ ++char X86CallFrameOptimization::ID = 0; ++} ++ ++FunctionPass *llvm::createX86CallFrameOptimization() { ++ return new X86CallFrameOptimization(); ++} ++ ++// This checks whether the transformation is legal and profitable ++bool X86CallFrameOptimization::shouldPerformTransformation(MachineFunction &MF) { ++ if (NoX86CFOpt.getValue()) ++ return false; ++ ++ // We currently only support call sequences where *all* parameters. ++ // are passed on the stack. ++ // No point in running this in 64-bit mode, since some arguments are ++ // passed in-register in all common calling conventions, so the pattern ++ // we're looking for will never match. ++ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); ++ if (STI.is64Bit()) ++ return false; ++ ++ // You would expect straight-line code between call-frame setup and ++ // call-frame destroy. You would be wrong. There are circumstances (e.g. ++ // CMOV_GR8 expansion of a select that feeds a function call!) where we can ++ // end up with the setup and the destroy in different basic blocks. ++ // This is bad, and breaks SP adjustment. ++ // So, check that all of the frames in the function are closed inside ++ // the same block, and, for good measure, that there are no nested frames. ++ int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); ++ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); ++ for (MachineBasicBlock &BB : MF) { ++ bool InsideFrameSequence = false; ++ for (MachineInstr &MI : BB) { ++ if (MI.getOpcode() == FrameSetupOpcode) { ++ if (InsideFrameSequence) ++ return false; ++ InsideFrameSequence = true; ++ } ++ else if (MI.getOpcode() == FrameDestroyOpcode) { ++ if (!InsideFrameSequence) ++ return false; ++ InsideFrameSequence = false; ++ } ++ } ++ ++ if (InsideFrameSequence) ++ return false; ++ } ++ ++ // Now that we know the transformation is legal, check if it is ++ // profitable. ++ // TODO: Add a heuristic that actually looks at the function, ++ // and enable this for more cases. ++ ++ // This transformation is always a win when we expected to have ++ // a reserved call frame. Under other circumstances, it may be either ++ // a win or a loss, and requires a heuristic. ++ // For now, enable it only for the relatively clear win cases. ++ bool CannotReserveFrame = MF.getFrameInfo()->hasVarSizedObjects(); ++ if (CannotReserveFrame) ++ return true; ++ ++ // For now, don't even try to evaluate the profitability when ++ // not optimizing for size. ++ AttributeSet FnAttrs = MF.getFunction()->getAttributes(); ++ bool OptForSize = ++ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, ++ Attribute::OptimizeForSize) || ++ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); ++ ++ if (!OptForSize) ++ return false; ++ ++ // Stack re-alignment can make this unprofitable even in terms of size. ++ // As mentioned above, a better heuristic is needed. For now, don't do this ++ // when the required alignment is above 8. (4 would be the safe choice, but ++ // some experimentation showed 8 is generally good). ++ if (TFL->getStackAlignment() > 8) ++ return false; ++ ++ return true; ++} ++ ++bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { ++ TII = MF.getSubtarget().getInstrInfo(); ++ TFL = MF.getSubtarget().getFrameLowering(); ++ MRI = &MF.getRegInfo(); ++ ++ if (!shouldPerformTransformation(MF)) ++ return false; ++ ++ int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); ++ ++ bool Changed = false; ++ ++ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) ++ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) ++ if (I->getOpcode() == FrameSetupOpcode) ++ Changed |= adjustCallSequence(MF, *BB, I); ++ ++ return Changed; ++} ++ ++bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, ++ MachineBasicBlock &MBB, ++ MachineBasicBlock::iterator I) { ++ ++ // Check that this particular call sequence is amenable to the ++ // transformation. ++ const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( ++ MF.getSubtarget().getRegisterInfo()); ++ unsigned StackPtr = RegInfo.getStackRegister(); ++ int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); ++ ++ // We expect to enter this at the beginning of a call sequence ++ assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); ++ MachineBasicBlock::iterator FrameSetup = I++; ++ ++ ++ // For globals in PIC mode, we can have some LEAs here. ++ // Ignore them, they don't bother us. ++ // TODO: Extend this to something that covers more cases. ++ while (I->getOpcode() == X86::LEA32r) ++ ++I; ++ ++ // We expect a copy instruction here. ++ // TODO: The copy instruction is a lowering artifact. ++ // We should also support a copy-less version, where the stack ++ // pointer is used directly. ++ if (!I->isCopy() || !I->getOperand(0).isReg()) ++ return false; ++ MachineBasicBlock::iterator SPCopy = I++; ++ StackPtr = SPCopy->getOperand(0).getReg(); ++ ++ // Scan the call setup sequence for the pattern we're looking for. ++ // We only handle a simple case - a sequence of MOV32mi or MOV32mr ++ // instructions, that push a sequence of 32-bit values onto the stack, with ++ // no gaps between them. ++ SmallVector<MachineInstr*, 4> MovVector(4, nullptr); ++ unsigned int MaxAdjust = FrameSetup->getOperand(0).getImm() / 4; ++ if (MaxAdjust > 4) ++ MovVector.resize(MaxAdjust, nullptr); ++ ++ do { ++ int Opcode = I->getOpcode(); ++ if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) ++ break; ++ ++ // We only want movs of the form: ++ // movl imm/r32, k(%esp) ++ // If we run into something else, bail. ++ // Note that AddrBaseReg may, counter to its name, not be a register, ++ // but rather a frame index. ++ // TODO: Support the fi case. This should probably work now that we ++ // have the infrastructure to track the stack pointer within a call ++ // sequence. ++ if (!I->getOperand(X86::AddrBaseReg).isReg() || ++ (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || ++ !I->getOperand(X86::AddrScaleAmt).isImm() || ++ (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || ++ (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || ++ (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || ++ !I->getOperand(X86::AddrDisp).isImm()) ++ return false; ++ ++ int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); ++ assert(StackDisp >= 0 && "Negative stack displacement when passing parameters"); ++ ++ // We really don't want to consider the unaligned case. ++ if (StackDisp % 4) ++ return false; ++ StackDisp /= 4; ++ ++ assert((size_t)StackDisp < MovVector.size() && ++ "Function call has more parameters than the stack is adjusted for."); ++ ++ // If the same stack slot is being filled twice, something's fishy. ++ if (MovVector[StackDisp] != nullptr) ++ return false; ++ MovVector[StackDisp] = I; ++ ++ ++I; ++ } while (I != MBB.end()); ++ ++ // We now expect the end of the sequence - a call and a stack adjust. ++ if (I == MBB.end()) ++ return false; ++ ++ // For PCrel calls, we expect an additional COPY of the basereg. ++ // If we find one, skip it. ++ if (I->isCopy()) { ++ if (I->getOperand(1).getReg() == ++ MF.getInfo<X86MachineFunctionInfo>()->getGlobalBaseReg()) ++ ++I; ++ else ++ return false; ++ } ++ ++ if (!I->isCall()) ++ return false; ++ MachineBasicBlock::iterator Call = I; ++ if ((++I)->getOpcode() != FrameDestroyOpcode) ++ return false; ++ ++ // Now, go through the vector, and see that we don't have any gaps, ++ // but only a series of 32-bit MOVs. ++ ++ int64_t ExpectedDist = 0; ++ auto MMI = MovVector.begin(), MME = MovVector.end(); ++ for (; MMI != MME; ++MMI, ExpectedDist += 4) ++ if (*MMI == nullptr) ++ break; ++ ++ // If the call had no parameters, do nothing ++ if (!ExpectedDist) ++ return false; ++ ++ // We are either at the last parameter, or a gap. ++ // Make sure it's not a gap ++ for (; MMI != MME; ++MMI) ++ if (*MMI != nullptr) ++ return false; ++ ++ // Ok, we can in fact do the transformation for this call. ++ // Do not remove the FrameSetup instruction, but adjust the parameters. ++ // PEI will end up finalizing the handling of this. ++ FrameSetup->getOperand(1).setImm(ExpectedDist); ++ ++ DebugLoc DL = I->getDebugLoc(); ++ // Now, iterate through the vector in reverse order, and replace the movs ++ // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to ++ // replace uses. ++ for (int Idx = (ExpectedDist / 4) - 1; Idx >= 0; --Idx) { ++ MachineBasicBlock::iterator MOV = *MovVector[Idx]; ++ MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); ++ if (MOV->getOpcode() == X86::MOV32mi) { ++ unsigned PushOpcode = X86::PUSHi32; ++ // If the operand is a small (8-bit) immediate, we can use a ++ // PUSH instruction with a shorter encoding. ++ // Note that isImm() may fail even though this is a MOVmi, because ++ // the operand can also be a symbol. ++ if (PushOp.isImm()) { ++ int64_t Val = PushOp.getImm(); ++ if (isInt<8>(Val)) ++ PushOpcode = X86::PUSH32i8; ++ } ++ BuildMI(MBB, Call, DL, TII->get(PushOpcode)).addOperand(PushOp); ++ } else { ++ unsigned int Reg = PushOp.getReg(); ++ ++ // If PUSHrmm is not slow on this target, try to fold the source of the ++ // push into the instruction. ++ const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>(); ++ bool SlowPUSHrmm = ST.isAtom() || ST.isSLM(); ++ ++ // Check that this is legal to fold. Right now, we're extremely ++ // conservative about that. ++ MachineInstr *DefMov = nullptr; ++ if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { ++ MachineInstr *Push = BuildMI(MBB, Call, DL, TII->get(X86::PUSH32rmm)); ++ ++ unsigned NumOps = DefMov->getDesc().getNumOperands(); ++ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) ++ Push->addOperand(DefMov->getOperand(i)); ++ ++ DefMov->eraseFromParent(); ++ } else { ++ BuildMI(MBB, Call, DL, TII->get(X86::PUSH32r)).addReg(Reg).getInstr(); ++ } ++ } ++ ++ MBB.erase(MOV); ++ } ++ ++ // The stack-pointer copy is no longer used in the call sequences. ++ // There should not be any other users, but we can't commit to that, so: ++ if (MRI->use_empty(SPCopy->getOperand(0).getReg())) ++ SPCopy->eraseFromParent(); ++ ++ // Once we've done this, we need to make sure PEI doesn't assume a reserved ++ // frame. ++ X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); ++ FuncInfo->setHasPushSequences(true); ++ ++ return true; ++} ++ ++MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( ++ MachineBasicBlock::iterator FrameSetup, unsigned Reg) { ++ // Do an extremely restricted form of load folding. ++ // ISel will often create patterns like: ++ // movl 4(%edi), %eax ++ // movl 8(%edi), %ecx ++ // movl 12(%edi), %edx ++ // movl %edx, 8(%esp) ++ // movl %ecx, 4(%esp) ++ // movl %eax, (%esp) ++ // call ++ // Get rid of those with prejudice. ++ if (!TargetRegisterInfo::isVirtualRegister(Reg)) ++ return nullptr; ++ ++ // Make sure this is the only use of Reg. ++ if (!MRI->hasOneNonDBGUse(Reg)) ++ return nullptr; ++ ++ MachineBasicBlock::iterator DefMI = MRI->getVRegDef(Reg); ++ ++ // Make sure the def is a MOV from memory. ++ // If the def is an another block, give up. ++ if (DefMI->getOpcode() != X86::MOV32rm || ++ DefMI->getParent() != FrameSetup->getParent()) ++ return nullptr; ++ ++ // Be careful with movs that load from a stack slot, since it may get ++ // resolved incorrectly. ++ // TODO: Again, we already have the infrastructure, so this should work. ++ if (!DefMI->getOperand(1).isReg()) ++ return nullptr; ++ ++ // Now, make sure everything else up until the ADJCALLSTACK is a sequence ++ // of MOVs. To be less conservative would require duplicating a lot of the ++ // logic from PeepholeOptimizer. ++ // FIXME: A possibly better approach would be to teach the PeepholeOptimizer ++ // to be smarter about folding into pushes. ++ for (auto I = DefMI; I != FrameSetup; ++I) ++ if (I->getOpcode() != X86::MOV32rm) ++ return nullptr; ++ ++ return DefMI; ++} +Index: lib/Target/X86/X86FastISel.cpp +=================================================================== +--- lib/Target/X86/X86FastISel.cpp ++++ lib/Target/X86/X86FastISel.cpp +@@ -2735,7 +2735,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo & + // Issue CALLSEQ_START + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) +- .addImm(NumBytes); ++ .addImm(NumBytes).addImm(0); + + // Walk the register/memloc assignments, inserting copies/loads. + const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>( +Index: lib/Target/X86/X86FrameLowering.cpp +=================================================================== +--- lib/Target/X86/X86FrameLowering.cpp ++++ lib/Target/X86/X86FrameLowering.cpp +@@ -38,9 +38,36 @@ using namespace llvm; + extern cl::opt<bool> ForceStackAlign; + + bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { +- return !MF.getFrameInfo()->hasVarSizedObjects(); ++ return !MF.getFrameInfo()->hasVarSizedObjects() && ++ !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); + } + ++/// canSimplifyCallFramePseudos - If there is a reserved call frame, the ++/// call frame pseudos can be simplified. Having a FP, as in the default ++/// implementation, is not sufficient here since we can't always use it. ++/// Use a more nuanced condition. ++bool ++X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { ++ const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *> ++ (MF.getSubtarget().getRegisterInfo()); ++ return hasReservedCallFrame(MF) || ++ (hasFP(MF) && !TRI->needsStackRealignment(MF)) ++ || TRI->hasBasePointer(MF); ++} ++ ++// needsFrameIndexResolution - Do we need to perform FI resolution for ++// this function. Normally, this is required only when the function ++// has any stack objects. However, FI resolution actually has another job, ++// not apparent from the title - it resolves callframesetup/destroy ++// that were not simplified earlier. ++// So, this is required for x86 functions that have push sequences even ++// when there are no stack objects. ++bool ++X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { ++ return MF.getFrameInfo()->hasStackObjects() || ++ MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); ++} ++ + /// hasFP - Return true if the specified function should have a dedicated frame + /// pointer register. This is true if the function has variable sized allocas + /// or if frame pointer elimination is disabled. +@@ -93,16 +120,6 @@ static unsigned getANDriOpcode(bool IsLP64, int64_ + return X86::AND32ri; + } + +-static unsigned getPUSHiOpcode(bool IsLP64, MachineOperand MO) { +- // We don't support LP64 for now. +- assert(!IsLP64); +- +- if (MO.isImm() && isInt<8>(MO.getImm())) +- return X86::PUSH32i8; +- +- return X86::PUSHi32;; +-} +- + static unsigned getLEArOpcode(unsigned IsLP64) { + return IsLP64 ? X86::LEA64r : X86::LEA32r; + } +@@ -1882,100 +1899,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi + #endif + } + +-bool X86FrameLowering:: +-convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB, +- MachineBasicBlock::iterator I, uint64_t Amount) const { +- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); +- const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>( +- MF.getSubtarget().getRegisterInfo()); +- unsigned StackPtr = RegInfo.getStackRegister(); +- +- // Scan the call setup sequence for the pattern we're looking for. +- // We only handle a simple case now - a sequence of MOV32mi or MOV32mr +- // instructions, that push a sequence of 32-bit values onto the stack, with +- // no gaps. +- std::map<int64_t, MachineBasicBlock::iterator> MovMap; +- do { +- int Opcode = I->getOpcode(); +- if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr) +- break; +- +- // We only want movs of the form: +- // movl imm/r32, k(%ecx) +- // If we run into something else, bail +- // Note that AddrBaseReg may, counterintuitively, not be a register... +- if (!I->getOperand(X86::AddrBaseReg).isReg() || +- (I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) || +- !I->getOperand(X86::AddrScaleAmt).isImm() || +- (I->getOperand(X86::AddrScaleAmt).getImm() != 1) || +- (I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) || +- (I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) || +- !I->getOperand(X86::AddrDisp).isImm()) +- return false; +- +- int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm(); +- +- // We don't want to consider the unaligned case. +- if (StackDisp % 4) +- return false; +- +- // If the same stack slot is being filled twice, something's fishy. +- if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second) +- return false; +- +- ++I; +- } while (I != MBB.end()); +- +- // We now expect the end of the sequence - a call and a stack adjust. +- if (I == MBB.end()) +- return false; +- if (!I->isCall()) +- return false; +- MachineBasicBlock::iterator Call = I; +- if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode()) +- return false; +- +- // Now, go through the map, and see that we don't have any gaps, +- // but only a series of 32-bit MOVs. +- // Since std::map provides ordered iteration, the original order +- // of the MOVs doesn't matter. +- int64_t ExpectedDist = 0; +- for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME; +- ++MMI, ExpectedDist += 4) +- if (MMI->first != ExpectedDist) +- return false; +- +- // Ok, everything looks fine. Do the transformation. +- DebugLoc DL = I->getDebugLoc(); +- +- // It's possible the original stack adjustment amount was larger than +- // that done by the pushes. If so, we still need a SUB. +- Amount -= ExpectedDist; +- if (Amount) { +- MachineInstr* Sub = BuildMI(MBB, Call, DL, +- TII.get(getSUBriOpcode(false, Amount)), StackPtr) +- .addReg(StackPtr).addImm(Amount); +- Sub->getOperand(3).setIsDead(); +- } +- +- // Now, iterate through the map in reverse order, and replace the movs +- // with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses. +- for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) { +- MachineBasicBlock::iterator MOV = MMI->second; +- MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); +- +- // Replace MOVmr with PUSH32r, and MOVmi with PUSHi of appropriate size +- int PushOpcode = X86::PUSH32r; +- if (MOV->getOpcode() == X86::MOV32mi) +- PushOpcode = getPUSHiOpcode(false, PushOp); +- +- BuildMI(MBB, Call, DL, TII.get(PushOpcode)).addOperand(PushOp); +- MBB.erase(MOV); +- } +- +- return true; +-} +- + void X86FrameLowering:: + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { +@@ -1990,7 +1913,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, + bool IsLP64 = STI.isTarget64BitLP64(); + DebugLoc DL = I->getDebugLoc(); + uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; +- uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; ++ uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; + I = MBB.erase(I); + + if (!reserveCallFrame) { +@@ -2010,24 +1933,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; + + MachineInstr *New = nullptr; +- if (Opcode == TII.getCallFrameSetupOpcode()) { +- // Try to convert movs to the stack into pushes. +- // We currently only look for a pattern that appears in 32-bit +- // calling conventions. +- if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount)) +- return; + +- New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), +- StackPtr) +- .addReg(StackPtr) +- .addImm(Amount); +- } else { +- assert(Opcode == TII.getCallFrameDestroyOpcode()); ++ // Factor out the amount that gets handled inside the sequence ++ // (Pushes of argument for frame setup, callee pops for frame destroy) ++ Amount -= InternalAmt; + +- // Factor out the amount the callee already popped. +- Amount -= CalleeAmt; ++ if (Amount) { ++ if (Opcode == TII.getCallFrameSetupOpcode()) { ++ New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr) ++ .addReg(StackPtr).addImm(Amount); ++ } else { ++ assert(Opcode == TII.getCallFrameDestroyOpcode()); + +- if (Amount) { + unsigned Opc = getADDriOpcode(IsLP64, Amount); + New = BuildMI(MF, DL, TII.get(Opc), StackPtr) + .addReg(StackPtr).addImm(Amount); +@@ -2045,13 +1962,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, + return; + } + +- if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { ++ if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) { + // If we are performing frame pointer elimination and if the callee pops + // something off the stack pointer, add it back. We do this until we have + // more advanced stack pointer tracking ability. +- unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); ++ unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt); + MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) +- .addReg(StackPtr).addImm(CalleeAmt); ++ .addReg(StackPtr).addImm(InternalAmt); + + // The EFLAGS implicit def is dead. + New->getOperand(3).setIsDead(); +Index: lib/Target/X86/X86FrameLowering.h +=================================================================== +--- lib/Target/X86/X86FrameLowering.h ++++ lib/Target/X86/X86FrameLowering.h +@@ -66,6 +66,8 @@ class X86FrameLowering : public TargetFrameLowerin + + bool hasFP(const MachineFunction &MF) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; ++ bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; ++ bool needsFrameIndexResolution(const MachineFunction &MF) const override; + + int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, +Index: lib/Target/X86/X86InstrCompiler.td +=================================================================== +--- lib/Target/X86/X86InstrCompiler.td ++++ lib/Target/X86/X86InstrCompiler.td +@@ -43,9 +43,9 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses + // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become + // sub / add which can clobber EFLAGS. + let Defs = [ESP, EFLAGS], Uses = [ESP] in { +-def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), ++def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKDOWN", +- [(X86callseq_start timm:$amt)]>, ++ []>, + Requires<[NotLP64]>; + def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKUP", +@@ -52,7 +52,10 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins + [(X86callseq_end timm:$amt1, timm:$amt2)]>, + Requires<[NotLP64]>; + } ++def : Pat<(X86callseq_start timm:$amt1), ++ (ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>; + ++ + // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into + // a stack adjustment and the codegen must know that they may modify the stack + // pointer before prolog-epilog rewriting occurs. +@@ -59,9 +62,9 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins + // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become + // sub / add which can clobber EFLAGS. + let Defs = [RSP, EFLAGS], Uses = [RSP] in { +-def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt), ++def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKDOWN", +- [(X86callseq_start timm:$amt)]>, ++ []>, + Requires<[IsLP64]>; + def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKUP", +@@ -68,9 +71,10 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins + [(X86callseq_end timm:$amt1, timm:$amt2)]>, + Requires<[IsLP64]>; + } ++def : Pat<(X86callseq_start timm:$amt1), ++ (ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>; + + +- + // x86-64 va_start lowering magic. + let usesCustomInserter = 1, Defs = [EFLAGS] in { + def VASTART_SAVE_XMM_REGS : I<0, Pseudo, +Index: lib/Target/X86/X86InstrInfo.cpp +=================================================================== +--- lib/Target/X86/X86InstrInfo.cpp ++++ lib/Target/X86/X86InstrInfo.cpp +@@ -1692,6 +1692,58 @@ X86InstrInfo::isCoalescableExtInstr(const MachineI + return false; + } + ++int X86InstrInfo::getSPAdjust(const MachineInstr *MI) const { ++ const MachineFunction *MF = MI->getParent()->getParent(); ++ const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); ++ ++ if (MI->getOpcode() == getCallFrameSetupOpcode() || ++ MI->getOpcode() == getCallFrameDestroyOpcode()) { ++ unsigned StackAlign = TFI->getStackAlignment(); ++ int SPAdj = (MI->getOperand(0).getImm() + StackAlign - 1) / StackAlign * ++ StackAlign; ++ ++ SPAdj -= MI->getOperand(1).getImm(); ++ ++ if (MI->getOpcode() == getCallFrameSetupOpcode()) ++ return SPAdj; ++ else ++ return -SPAdj; ++ } ++ ++ // To know whether a call adjusts the stack, we need information ++ // that is bound to the following ADJCALLSTACKUP pseudo. ++ // Look for the next ADJCALLSTACKUP that follows the call. ++ if (MI->isCall()) { ++ const MachineBasicBlock* MBB = MI->getParent(); ++ auto I = ++MachineBasicBlock::const_iterator(MI); ++ for (auto E = MBB->end(); I != E; ++I) { ++ if (I->getOpcode() == getCallFrameDestroyOpcode() || ++ I->isCall()) ++ break; ++ } ++ ++ // If we could not find a frame destroy opcode, then it has already ++ // been simplified, so we don't care. ++ if (I->getOpcode() != getCallFrameDestroyOpcode()) ++ return 0; ++ ++ return -(I->getOperand(1).getImm()); ++ } ++ ++ // Currently handle only PUSHes we can reasonably expect to see ++ // in call sequences ++ switch (MI->getOpcode()) { ++ default: ++ return 0; ++ case X86::PUSH32i8: ++ case X86::PUSH32r: ++ case X86::PUSH32rmm: ++ case X86::PUSH32rmr: ++ case X86::PUSHi32: ++ return 4; ++ } ++} ++ + /// isFrameOperand - Return true and the FrameIndex if the specified + /// operand and follow operands form a reference to the stack frame. + bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, +Index: lib/Target/X86/X86InstrInfo.h +=================================================================== +--- lib/Target/X86/X86InstrInfo.h ++++ lib/Target/X86/X86InstrInfo.h +@@ -175,6 +175,11 @@ class X86InstrInfo final : public X86GenInstrInfo + /// + const X86RegisterInfo &getRegisterInfo() const { return RI; } + ++ /// getSPAdjust - This returns the stack pointer adjustment made by ++ /// this instruction. For x86, we need to handle more complex call ++ /// sequences involving PUSHes. ++ int getSPAdjust(const MachineInstr *MI) const override; ++ + /// isCoalescableExtInstr - Return true if the instruction is a "coalescable" + /// extension instruction. That is, it's like a copy where it's legal for the + /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns +Index: lib/Target/X86/X86MachineFunctionInfo.h +=================================================================== +--- lib/Target/X86/X86MachineFunctionInfo.h ++++ lib/Target/X86/X86MachineFunctionInfo.h +@@ -77,6 +77,9 @@ class X86MachineFunctionInfo : public MachineFunct + unsigned ArgumentStackSize; + /// NumLocalDynamics - Number of local-dynamic TLS accesses. + unsigned NumLocalDynamics; ++ /// HasPushSequences - Keeps track of whether this function uses sequences ++ /// of pushes to pass function parameters. ++ bool HasPushSequences; + + private: + /// ForwardedMustTailRegParms - A list of virtual and physical registers +@@ -97,7 +100,8 @@ class X86MachineFunctionInfo : public MachineFunct + VarArgsGPOffset(0), + VarArgsFPOffset(0), + ArgumentStackSize(0), +- NumLocalDynamics(0) {} ++ NumLocalDynamics(0), ++ HasPushSequences(false) {} + + explicit X86MachineFunctionInfo(MachineFunction &MF) + : ForceFramePointer(false), +@@ -113,11 +117,15 @@ class X86MachineFunctionInfo : public MachineFunct + VarArgsGPOffset(0), + VarArgsFPOffset(0), + ArgumentStackSize(0), +- NumLocalDynamics(0) {} ++ NumLocalDynamics(0), ++ HasPushSequences(false) {} + + bool getForceFramePointer() const { return ForceFramePointer;} + void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } + ++ bool getHasPushSequences() const { return HasPushSequences; } ++ void setHasPushSequences(bool HasPush) { HasPushSequences = HasPush; } ++ + bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; } + void setRestoreBasePointer(const MachineFunction *MF); + int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; } +Index: lib/Target/X86/X86RegisterInfo.cpp +=================================================================== +--- lib/Target/X86/X86RegisterInfo.cpp ++++ lib/Target/X86/X86RegisterInfo.cpp +@@ -468,8 +468,6 @@ void + X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { +- assert(SPAdj == 0 && "Unexpected"); +- + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); +@@ -506,6 +504,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicB + } else + FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex); + ++ if (BasePtr == StackPtr) ++ FIOffset += SPAdj; ++ + // The frame index format for stackmaps and patchpoints is different from the + // X86 format. It only has a FI and an offset. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { +Index: lib/Target/X86/X86TargetMachine.cpp +=================================================================== +--- lib/Target/X86/X86TargetMachine.cpp ++++ lib/Target/X86/X86TargetMachine.cpp +@@ -154,6 +154,7 @@ class X86PassConfig : public TargetPassConfig { + void addIRPasses() override; + bool addInstSelector() override; + bool addILPOpts() override; ++ void addPreRegAlloc() override; + void addPostRegAlloc() override; + void addPreEmitPass() override; + }; +@@ -187,6 +188,10 @@ bool X86PassConfig::addILPOpts() { + return true; + } + ++void X86PassConfig::addPreRegAlloc() { ++ addPass(createX86CallFrameOptimization()); ++} ++ + void X86PassConfig::addPostRegAlloc() { + addPass(createX86FloatingPointStackifierPass()); + } +Index: test/CodeGen/X86/inalloca-invoke.ll +=================================================================== +--- test/CodeGen/X86/inalloca-invoke.ll ++++ test/CodeGen/X86/inalloca-invoke.ll +@@ -31,7 +31,7 @@ blah: + to label %invoke.cont unwind label %lpad + + ; Uses end as sret param. +-; CHECK: movl %[[end]], (%esp) ++; CHECK: pushl %[[end]] + ; CHECK: calll _plus + + invoke.cont: +Index: test/CodeGen/X86/movtopush.ll +=================================================================== +--- test/CodeGen/X86/movtopush.ll ++++ test/CodeGen/X86/movtopush.ll +@@ -1,10 +1,12 @@ + ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL ++; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64 + ; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED ++ + declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) + declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d) + + ; Here, we should have a reserved frame, so we don't expect pushes +-; NORMAL-LABEL: test1 ++; NORMAL-LABEL: test1: + ; NORMAL: subl $16, %esp + ; NORMAL-NEXT: movl $4, 12(%esp) + ; NORMAL-NEXT: movl $3, 8(%esp) +@@ -11,6 +13,7 @@ declare void @inreg(i32 %a, i32 inreg %b, i32 %c, + ; NORMAL-NEXT: movl $2, 4(%esp) + ; NORMAL-NEXT: movl $1, (%esp) + ; NORMAL-NEXT: call ++; NORMAL-NEXT: addl $16, %esp + define void @test1() { + entry: + call void @good(i32 1, i32 2, i32 3, i32 4) +@@ -17,8 +20,10 @@ entry: + ret void + } + +-; Here, we expect a sequence of 4 immediate pushes +-; NORMAL-LABEL: test2 ++; We're optimizing for code size, so we should get pushes for x86, ++; even though there is a reserved call frame. ++; Make sure we don't touch x86-64 ++; NORMAL-LABEL: test1b: + ; NORMAL-NOT: subl {{.*}} %esp + ; NORMAL: pushl $4 + ; NORMAL-NEXT: pushl $3 +@@ -25,6 +30,42 @@ entry: + ; NORMAL-NEXT: pushl $2 + ; NORMAL-NEXT: pushl $1 + ; NORMAL-NEXT: call ++; NORMAL-NEXT: addl $16, %esp ++; X64-LABEL: test1b: ++; X64: movl $1, %ecx ++; X64-NEXT: movl $2, %edx ++; X64-NEXT: movl $3, %r8d ++; X64-NEXT: movl $4, %r9d ++; X64-NEXT: callq good ++define void @test1b() optsize { ++entry: ++ call void @good(i32 1, i32 2, i32 3, i32 4) ++ ret void ++} ++ ++; Same as above, but for minsize ++; NORMAL-LABEL: test1c: ++; NORMAL-NOT: subl {{.*}} %esp ++; NORMAL: pushl $4 ++; NORMAL-NEXT: pushl $3 ++; NORMAL-NEXT: pushl $2 ++; NORMAL-NEXT: pushl $1 ++; NORMAL-NEXT: call ++; NORMAL-NEXT: addl $16, %esp ++define void @test1c() minsize { ++entry: ++ call void @good(i32 1, i32 2, i32 3, i32 4) ++ ret void ++} ++ ++; If we have a reserved frame, we should have pushes ++; NORMAL-LABEL: test2: ++; NORMAL-NOT: subl {{.*}} %esp ++; NORMAL: pushl $4 ++; NORMAL-NEXT: pushl $3 ++; NORMAL-NEXT: pushl $2 ++; NORMAL-NEXT: pushl $1 ++; NORMAL-NEXT: call + define void @test2(i32 %k) { + entry: + %a = alloca i32, i32 %k +@@ -34,7 +75,7 @@ entry: + + ; Again, we expect a sequence of 4 immediate pushes + ; Checks that we generate the right pushes for >8bit immediates +-; NORMAL-LABEL: test2b ++; NORMAL-LABEL: test2b: + ; NORMAL-NOT: subl {{.*}} %esp + ; NORMAL: pushl $4096 + ; NORMAL-NEXT: pushl $3072 +@@ -41,15 +82,15 @@ entry: + ; NORMAL-NEXT: pushl $2048 + ; NORMAL-NEXT: pushl $1024 + ; NORMAL-NEXT: call +-define void @test2b(i32 %k) { ++; NORMAL-NEXT: addl $16, %esp ++define void @test2b() optsize { + entry: +- %a = alloca i32, i32 %k + call void @good(i32 1024, i32 2048, i32 3072, i32 4096) + ret void + } + + ; The first push should push a register +-; NORMAL-LABEL: test3 ++; NORMAL-LABEL: test3: + ; NORMAL-NOT: subl {{.*}} %esp + ; NORMAL: pushl $4 + ; NORMAL-NEXT: pushl $3 +@@ -56,15 +97,15 @@ entry: + ; NORMAL-NEXT: pushl $2 + ; NORMAL-NEXT: pushl %e{{..}} + ; NORMAL-NEXT: call +-define void @test3(i32 %k) { ++; NORMAL-NEXT: addl $16, %esp ++define void @test3(i32 %k) optsize { + entry: +- %a = alloca i32, i32 %k + call void @good(i32 %k, i32 2, i32 3, i32 4) + ret void + } + + ; We don't support weird calling conventions +-; NORMAL-LABEL: test4 ++; NORMAL-LABEL: test4: + ; NORMAL: subl $12, %esp + ; NORMAL-NEXT: movl $4, 8(%esp) + ; NORMAL-NEXT: movl $3, 4(%esp) +@@ -71,16 +112,16 @@ entry: + ; NORMAL-NEXT: movl $1, (%esp) + ; NORMAL-NEXT: movl $2, %eax + ; NORMAL-NEXT: call +-define void @test4(i32 %k) { ++; NORMAL-NEXT: addl $12, %esp ++define void @test4() optsize { + entry: +- %a = alloca i32, i32 %k + call void @inreg(i32 1, i32 2, i32 3, i32 4) + ret void + } + +-; Check that additional alignment is added when the pushes +-; don't add up to the required alignment. +-; ALIGNED-LABEL: test5 ++; When there is no reserved call frame, check that additional alignment ++; is added when the pushes don't add up to the required alignment. ++; ALIGNED-LABEL: test5: + ; ALIGNED: subl $16, %esp + ; ALIGNED-NEXT: pushl $4 + ; ALIGNED-NEXT: pushl $3 +@@ -97,7 +138,7 @@ entry: + ; Check that pushing the addresses of globals (Or generally, things that + ; aren't exactly immediates) isn't broken. + ; Fixes PR21878. +-; NORMAL-LABEL: test6 ++; NORMAL-LABEL: test6: + ; NORMAL: pushl $_ext + ; NORMAL-NEXT: call + declare void @f(i8*) +@@ -110,3 +151,108 @@ bb: + alloca i32 + ret void + } ++ ++; Check that we fold simple cases into the push ++; NORMAL-LABEL: test7: ++; NORMAL-NOT: subl {{.*}} %esp ++; NORMAL: movl 4(%esp), [[EAX:%e..]] ++; NORMAL-NEXT: pushl $4 ++; NORMAL-NEXT: pushl ([[EAX]]) ++; NORMAL-NEXT: pushl $2 ++; NORMAL-NEXT: pushl $1 ++; NORMAL-NEXT: call ++; NORMAL-NEXT: addl $16, %esp ++define void @test7(i32* %ptr) optsize { ++entry: ++ %val = load i32* %ptr ++ call void @good(i32 1, i32 2, i32 %val, i32 4) ++ ret void ++} ++ ++; But we don't want to fold stack-relative loads into the push, ++; because the offset will be wrong ++; NORMAL-LABEL: test8: ++; NORMAL-NOT: subl {{.*}} %esp ++; NORMAL: movl 4(%esp), [[EAX:%e..]] ++; NORMAL-NEXT: pushl $4 ++; NORMAL-NEXT: pushl [[EAX]] ++; NORMAL-NEXT: pushl $2 ++; NORMAL-NEXT: pushl $1 ++; NORMAL-NEXT: call ++; NORMAL-NEXT: addl $16, %esp ++define void @test8(i32* %ptr) optsize { ++entry: ++ %val = ptrtoint i32* %ptr to i32 ++ call void @good(i32 1, i32 2, i32 %val, i32 4) ++ ret void ++} ++ ++; If one function is using push instructions, and the other isn't ++; (because it has frame-index references), then we must resolve ++; these references correctly. ++; NORMAL-LABEL: test9: ++; NORMAL-NOT: leal (%esp), ++; NORMAL: pushl $4 ++; NORMAL-NEXT: pushl $3 ++; NORMAL-NEXT: pushl $2 ++; NORMAL-NEXT: pushl $1 ++; NORMAL-NEXT: call ++; NORMAL-NEXT: addl $16, %esp ++; NORMAL-NEXT: subl $16, %esp ++; NORMAL-NEXT: leal 16(%esp), [[EAX:%e..]] ++; NORMAL-NEXT: movl [[EAX]], 12(%esp) ++; NORMAL-NEXT: movl $7, 8(%esp) ++; NORMAL-NEXT: movl $6, 4(%esp) ++; NORMAL-NEXT: movl $5, (%esp) ++; NORMAL-NEXT: call ++; NORMAL-NEXT: addl $16, %esp ++define void @test9() optsize { ++entry: ++ %p = alloca i32, align 4 ++ call void @good(i32 1, i32 2, i32 3, i32 4) ++ %0 = ptrtoint i32* %p to i32 ++ call void @good(i32 5, i32 6, i32 7, i32 %0) ++ ret void ++} ++ ++; We can end up with an indirect call which gets reloaded on the spot. ++; Make sure we reference the correct stack slot - we spill into (%esp) ++; and reload from 16(%esp) due to the pushes. ++; NORMAL-LABEL: test10: ++; NORMAL: movl $_good, [[ALLOC:.*]] ++; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]] ++; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill ++; NORMAL: nop ++; NORMAL: pushl $4 ++; NORMAL-NEXT: pushl $3 ++; NORMAL-NEXT: pushl $2 ++; NORMAL-NEXT: pushl $1 ++; NORMAL-NEXT: calll *16(%esp) ++; NORMAL-NEXT: addl $16, %esp ++define void @test10() optsize { ++ %stack_fptr = alloca void (i32, i32, i32, i32)* ++ store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr ++ %good_ptr = load volatile void (i32, i32, i32, i32)** %stack_fptr ++ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() ++ call void (i32, i32, i32, i32)* %good_ptr(i32 1, i32 2, i32 3, i32 4) ++ ret void ++} ++ ++; We can't fold the load from the global into the push because of ++; interference from the store ++; NORMAL-LABEL: test11: ++; NORMAL: movl _the_global, [[EAX:%e..]] ++; NORMAL-NEXT: movl $42, _the_global ++; NORMAL-NEXT: pushl $4 ++; NORMAL-NEXT: pushl $3 ++; NORMAL-NEXT: pushl $2 ++; NORMAL-NEXT: pushl [[EAX]] ++; NORMAL-NEXT: call ++; NORMAL-NEXT: addl $16, %esp ++@the_global = external global i32 ++define void @test11() optsize { ++ %myload = load i32* @the_global ++ store i32 42, i32* @the_global ++ call void @good(i32 %myload, i32 2, i32 3, i32 4) ++ ret void ++} diff --git a/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff b/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff new file mode 100644 index 0000000..2896899 --- /dev/null +++ b/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff @@ -0,0 +1,419 @@ +Pull in r230348 from upstream llvm trunk (by Tim Northover): + + ARM: treat [N x i32] and [N x i64] as AAPCS composite types + + The logic is almost there already, with our special homogeneous + aggregate handling. Tweaking it like this allows front-ends to emit + AAPCS compliant code without ever having to count registers or add + discarded padding arguments. + + Only arrays of i32 and i64 are needed to model AAPCS rules, but I + decided to apply the logic to all integer arrays for more consistency. + +This fixes a possible "Unexpected member type for HA" error when +compiling lib/msun/bsdsrc/b_tgamma.c for armv6. + +Reported by: Jakub Palider <jpa@semihalf.com> + +Introduced here: https://svnweb.freebsd.org/changeset/base/280400 + +Index: include/llvm/CodeGen/CallingConvLower.h +=================================================================== +--- include/llvm/CodeGen/CallingConvLower.h ++++ include/llvm/CodeGen/CallingConvLower.h +@@ -122,8 +122,8 @@ class CCValAssign { + // There is no need to differentiate between a pending CCValAssign and other + // kinds, as they are stored in a different list. + static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, +- LocInfo HTP) { +- return getReg(ValNo, ValVT, 0, LocVT, HTP); ++ LocInfo HTP, unsigned ExtraInfo = 0) { ++ return getReg(ValNo, ValVT, ExtraInfo, LocVT, HTP); + } + + void convertToReg(unsigned RegNo) { +@@ -146,6 +146,7 @@ class CCValAssign { + + unsigned getLocReg() const { assert(isRegLoc()); return Loc; } + unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; } ++ unsigned getExtraInfo() const { return Loc; } + MVT getLocVT() const { return LocVT; } + + LocInfo getLocInfo() const { return HTP; } +Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +=================================================================== +--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp ++++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +@@ -7429,11 +7429,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo + } + if (Args[i].isNest) + Flags.setNest(); +- if (NeedsRegBlock) { ++ if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); +- if (Value == NumValues - 1) +- Flags.setInConsecutiveRegsLast(); +- } + Flags.setOrigAlign(OriginalAlignment); + + MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); +@@ -7482,6 +7479,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo + CLI.Outs.push_back(MyFlags); + CLI.OutVals.push_back(Parts[j]); + } ++ ++ if (NeedsRegBlock && Value == NumValues - 1) ++ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); + } + } + +@@ -7697,11 +7697,8 @@ void SelectionDAGISel::LowerArguments(const Functi + } + if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) + Flags.setNest(); +- if (NeedsRegBlock) { ++ if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); +- if (Value == NumValues - 1) +- Flags.setInConsecutiveRegsLast(); +- } + Flags.setOrigAlign(OriginalAlignment); + + MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); +@@ -7716,6 +7713,8 @@ void SelectionDAGISel::LowerArguments(const Functi + MyFlags.Flags.setOrigAlign(1); + Ins.push_back(MyFlags); + } ++ if (NeedsRegBlock && Value == NumValues - 1) ++ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); + PartBase += VT.getStoreSize(); + } + } +Index: lib/Target/ARM/ARMCallingConv.h +=================================================================== +--- lib/Target/ARM/ARMCallingConv.h ++++ lib/Target/ARM/ARMCallingConv.h +@@ -160,6 +160,8 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &V + State); + } + ++static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; ++ + static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, +@@ -168,81 +170,114 @@ static const uint16_t DRegList[] = { ARM::D0, ARM: + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; + static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; + ++ + // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA + // has InConsecutiveRegs set, and that the last member also has + // InConsecutiveRegsLast set. We must process all members of the HA before + // we can allocate it, as we need to know the total number of registers that + // will be needed in order to (attempt to) allocate a contiguous block. +-static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +- CCValAssign::LocInfo &LocInfo, +- ISD::ArgFlagsTy &ArgFlags, CCState &State) { +- SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); ++static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, ++ MVT &LocVT, ++ CCValAssign::LocInfo &LocInfo, ++ ISD::ArgFlagsTy &ArgFlags, ++ CCState &State) { ++ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); + + // AAPCS HFAs must have 1-4 elements, all of the same type +- assert(PendingHAMembers.size() < 4); +- if (PendingHAMembers.size() > 0) +- assert(PendingHAMembers[0].getLocVT() == LocVT); ++ if (PendingMembers.size() > 0) ++ assert(PendingMembers[0].getLocVT() == LocVT); + + // Add the argument to the list to be allocated once we know the size of the +- // HA +- PendingHAMembers.push_back( +- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); ++ // aggregate. Store the type's required alignmnent as extra info for later: in ++ // the [N x i64] case all trace has been removed by the time we actually get ++ // to do allocation. ++ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo, ++ ArgFlags.getOrigAlign())); + +- if (ArgFlags.isInConsecutiveRegsLast()) { +- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && +- "Homogeneous aggregates must have between 1 and 4 members"); ++ if (!ArgFlags.isInConsecutiveRegsLast()) ++ return true; + +- // Try to allocate a contiguous block of registers, each of the correct +- // size to hold one member. +- ArrayRef<uint16_t> RegList; +- switch (LocVT.SimpleTy) { +- case MVT::f32: +- RegList = SRegList; +- break; +- case MVT::f64: +- RegList = DRegList; +- break; +- case MVT::v2f64: +- RegList = QRegList; +- break; +- default: +- llvm_unreachable("Unexpected member type for HA"); +- break; +- } ++ // Try to allocate a contiguous block of registers, each of the correct ++ // size to hold one member. ++ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U); + +- unsigned RegResult = +- State.AllocateRegBlock(RegList, PendingHAMembers.size()); ++ ArrayRef<uint16_t> RegList; ++ switch (LocVT.SimpleTy) { ++ case MVT::i32: { ++ RegList = RRegList; ++ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size()); + +- if (RegResult) { +- for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin(); +- It != PendingHAMembers.end(); ++It) { +- It->convertToReg(RegResult); +- State.addLoc(*It); +- ++RegResult; +- } +- PendingHAMembers.clear(); +- return true; +- } ++ // First consume all registers that would give an unaligned object. Whether ++ // we go on stack or in regs, no-one will be using them in future. ++ unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4; ++ while (RegIdx % RegAlign != 0 && RegIdx < RegList.size()) ++ State.AllocateReg(RegList[RegIdx++]); + +- // Register allocation failed, fall back to the stack ++ break; ++ } ++ case MVT::f32: ++ RegList = SRegList; ++ break; ++ case MVT::f64: ++ RegList = DRegList; ++ break; ++ case MVT::v2f64: ++ RegList = QRegList; ++ break; ++ default: ++ llvm_unreachable("Unexpected member type for block aggregate"); ++ break; ++ } + +- // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp) +- for (unsigned regNo = 0; regNo < 16; ++regNo) +- State.AllocateReg(SRegList[regNo]); ++ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); ++ if (RegResult) { ++ for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin(); ++ It != PendingMembers.end(); ++It) { ++ It->convertToReg(RegResult); ++ State.addLoc(*It); ++ ++RegResult; ++ } ++ PendingMembers.clear(); ++ return true; ++ } + +- unsigned Size = LocVT.getSizeInBits() / 8; +- unsigned Align = std::min(Size, 8U); ++ // Register allocation failed, we'll be needing the stack ++ unsigned Size = LocVT.getSizeInBits() / 8; ++ if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) { ++ // If nothing else has used the stack until this point, a non-HFA aggregate ++ // can be split between regs and stack. ++ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size()); ++ for (auto &It : PendingMembers) { ++ if (RegIdx >= RegList.size()) ++ It.convertToMem(State.AllocateStack(Size, Size)); ++ else ++ It.convertToReg(State.AllocateReg(RegList[RegIdx++])); + +- for (auto It : PendingHAMembers) { +- It.convertToMem(State.AllocateStack(Size, Align)); + State.addLoc(It); + } ++ PendingMembers.clear(); ++ return true; ++ } else if (LocVT != MVT::i32) ++ RegList = SRegList; + +- // All pending members have now been allocated +- PendingHAMembers.clear(); ++ // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core) ++ for (auto Reg : RegList) ++ State.AllocateReg(Reg); ++ ++ for (auto &It : PendingMembers) { ++ It.convertToMem(State.AllocateStack(Size, Align)); ++ State.addLoc(It); ++ ++ // After the first item has been allocated, the rest are packed as tightly ++ // as possible. (E.g. an incoming i64 would have starting Align of 8, but ++ // we'll be allocating a bunch of i32 slots). ++ Align = Size; + } + +- // This will be allocated by the last member of the HA ++ // All pending members have now been allocated ++ PendingMembers.clear(); ++ ++ // This will be allocated by the last member of the aggregate + return true; + } + +Index: lib/Target/ARM/ARMCallingConv.td +=================================================================== +--- lib/Target/ARM/ARMCallingConv.td ++++ lib/Target/ARM/ARMCallingConv.td +@@ -175,7 +175,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + // HFAs are passed in a contiguous block of registers, or on the stack +- CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>, ++ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>, + + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, +Index: lib/Target/ARM/ARMISelLowering.cpp +=================================================================== +--- lib/Target/ARM/ARMISelLowering.cpp ++++ lib/Target/ARM/ARMISelLowering.cpp +@@ -11285,7 +11285,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas + return (Members > 0 && Members <= 4); + } + +-/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. ++/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of ++/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when ++/// passing according to AAPCS rules. + bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) != +@@ -11294,7 +11296,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; +- bool result = isHomogeneousAggregate(Ty, Base, Members); +- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump()); +- return result; ++ bool IsHA = isHomogeneousAggregate(Ty, Base, Members); ++ DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump()); ++ ++ bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); ++ return IsHA || IsIntArray; + } +Index: test/CodeGen/ARM/aggregate-padding.ll +=================================================================== +--- test/CodeGen/ARM/aggregate-padding.ll ++++ test/CodeGen/ARM/aggregate-padding.ll +@@ -0,0 +1,101 @@ ++; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s ++ ++; [2 x i64] should be contiguous when split (e.g. we shouldn't try to align all ++; i32 components to 64 bits). Also makes sure i64 based types are properly ++; aligned on the stack. ++define i64 @test_i64_contiguous_on_stack([8 x double], float, i32 %in, [2 x i64] %arg) nounwind { ++; CHECK-LABEL: test_i64_contiguous_on_stack: ++; CHECK-DAG: ldr [[LO0:r[0-9]+]], [sp, #8] ++; CHECK-DAG: ldr [[HI0:r[0-9]+]], [sp, #12] ++; CHECK-DAG: ldr [[LO1:r[0-9]+]], [sp, #16] ++; CHECK-DAG: ldr [[HI1:r[0-9]+]], [sp, #20] ++; CHECK: adds r0, [[LO0]], [[LO1]] ++; CHECK: adc r1, [[HI0]], [[HI1]] ++ ++ %val1 = extractvalue [2 x i64] %arg, 0 ++ %val2 = extractvalue [2 x i64] %arg, 1 ++ %sum = add i64 %val1, %val2 ++ ret i64 %sum ++} ++ ++; [2 x i64] should try to use looks for 4 regs, not 8 (which might happen if the ++; i64 -> i32, i32 split wasn't handled correctly). ++define i64 @test_2xi64_uses_4_regs([8 x double], float, [2 x i64] %arg) nounwind { ++; CHECK-LABEL: test_2xi64_uses_4_regs: ++; CHECK-DAG: mov r0, r2 ++; CHECK-DAG: mov r1, r3 ++ ++ %val = extractvalue [2 x i64] %arg, 1 ++ ret i64 %val ++} ++ ++; An aggregate should be able to split between registers and stack if there is ++; nothing else on the stack. ++define i32 @test_aggregates_split([8 x double], i32, [4 x i32] %arg) nounwind { ++; CHECK-LABEL: test_aggregates_split: ++; CHECK: ldr [[VAL3:r[0-9]+]], [sp] ++; CHECK: add r0, r1, [[VAL3]] ++ ++ %val0 = extractvalue [4 x i32] %arg, 0 ++ %val3 = extractvalue [4 x i32] %arg, 3 ++ %sum = add i32 %val0, %val3 ++ ret i32 %sum ++} ++ ++; If an aggregate has to be moved entirely onto the stack, nothing should be ++; able to use r0-r3 any more. Also checks that [2 x i64] properly aligned when ++; it uses regs. ++define i32 @test_no_int_backfilling([8 x double], float, i32, [2 x i64], i32 %arg) nounwind { ++; CHECK-LABEL: test_no_int_backfilling: ++; CHECK: ldr r0, [sp, #24] ++ ret i32 %arg ++} ++ ++; Even if the argument was successfully allocated as reg block, there should be ++; no backfillig to r1. ++define i32 @test_no_int_backfilling_regsonly(i32, [1 x i64], i32 %arg) { ++; CHECK-LABEL: test_no_int_backfilling_regsonly: ++; CHECK: ldr r0, [sp] ++ ret i32 %arg ++} ++ ++; If an aggregate has to be moved entirely onto the stack, nothing should be ++; able to use r0-r3 any more. ++define float @test_no_float_backfilling([7 x double], [4 x i32], i32, [4 x double], float %arg) nounwind { ++; CHECK-LABEL: test_no_float_backfilling: ++; CHECK: vldr s0, [sp, #40] ++ ret float %arg ++} ++ ++; They're a bit pointless, but types like [N x i8] should work as well. ++define i8 @test_i8_in_regs(i32, [3 x i8] %arg) { ++; CHECK-LABEL: test_i8_in_regs: ++; CHECK: add r0, r1, r3 ++ %val0 = extractvalue [3 x i8] %arg, 0 ++ %val2 = extractvalue [3 x i8] %arg, 2 ++ %sum = add i8 %val0, %val2 ++ ret i8 %sum ++} ++ ++define i16 @test_i16_split(i32, i32, [3 x i16] %arg) { ++; CHECK-LABEL: test_i16_split: ++; CHECK: ldrh [[VAL2:r[0-9]+]], [sp] ++; CHECK: add r0, r2, [[VAL2]] ++ %val0 = extractvalue [3 x i16] %arg, 0 ++ %val2 = extractvalue [3 x i16] %arg, 2 ++ %sum = add i16 %val0, %val2 ++ ret i16 %sum ++} ++ ++; Beware: on the stack each i16 still gets a 32-bit slot, the array is not ++; packed. ++define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg) { ++; CHECK-LABEL: test_i16_forced_stack: ++; CHECK-DAG: ldrh [[VAL0:r[0-9]+]], [sp, #8] ++; CHECK-DAG: ldrh [[VAL2:r[0-9]+]], [sp, #16] ++; CHECK: add r0, [[VAL0]], [[VAL2]] ++ %val0 = extractvalue [3 x i16] %arg, 0 ++ %val2 = extractvalue [3 x i16] %arg, 2 ++ %sum = add i16 %val0, %val2 ++ ret i16 %sum ++} diff --git a/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff b/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff new file mode 100644 index 0000000..33ca358 --- /dev/null +++ b/contrib/llvm/patches/patch-09-clang-r227115-constantarraytype.diff @@ -0,0 +1,50 @@ +Pull in r227115 from upstream clang trunk (by Ben Langmuir): + + Fix assert instantiating string init of static variable + + ... when the variable's type is a typedef of a ConstantArrayType. Just + look through the typedef (and any other sugar). We only use the + constant array type here to get the element count. + +This fixes an assertion failure when building the games/redeclipse port. + +Introduced here: http://svnweb.freebsd.org/changeset/base/281046 + +Index: tools/clang/lib/Sema/SemaInit.cpp +=================================================================== +--- tools/clang/lib/Sema/SemaInit.cpp ++++ tools/clang/lib/Sema/SemaInit.cpp +@@ -149,10 +149,10 @@ static void updateStringLiteralType(Expr *E, QualT + static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, + Sema &S) { + // Get the length of the string as parsed. +- uint64_t StrLength = +- cast<ConstantArrayType>(Str->getType())->getSize().getZExtValue(); ++ auto *ConstantArrayTy = ++ cast<ConstantArrayType>(Str->getType()->getUnqualifiedDesugaredType()); ++ uint64_t StrLength = ConstantArrayTy->getSize().getZExtValue(); + +- + if (const IncompleteArrayType *IAT = dyn_cast<IncompleteArrayType>(AT)) { + // C99 6.7.8p14. We have an array of character type with unknown size + // being initialized to a string literal. +Index: tools/clang/test/SemaTemplate/instantiate-static-var.cpp +=================================================================== +--- tools/clang/test/SemaTemplate/instantiate-static-var.cpp ++++ tools/clang/test/SemaTemplate/instantiate-static-var.cpp +@@ -114,3 +114,15 @@ namespace PR6449 { + template class X1<char>; + + } ++ ++typedef char MyString[100]; ++template <typename T> ++struct StaticVarWithTypedefString { ++ static MyString str; ++}; ++template <typename T> ++MyString StaticVarWithTypedefString<T>::str = ""; ++ ++void testStaticVarWithTypedefString() { ++ (void)StaticVarWithTypedefString<int>::str; ++} |