diff options
author | dim <dim@FreeBSD.org> | 2015-01-18 14:14:47 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-01-18 14:14:47 +0000 |
commit | c074a2b0d05fdd11d61e6c5ffa970c806be2f31a (patch) | |
tree | 0e56d6fc0fbb158ec2ac946d1e22170e0107492d /contrib/llvm/patches | |
parent | 814696f72012ae32daa581d7a1d5253a9b439b09 (diff) | |
parent | 3c7e7a1538a873b0d3b012ef8811969ac4552c2a (diff) | |
download | FreeBSD-src-c074a2b0d05fdd11d61e6c5ffa970c806be2f31a.zip FreeBSD-src-c074a2b0d05fdd11d61e6c5ffa970c806be2f31a.tar.gz |
Upgrade our copy of clang and llvm to 3.5.1 release. This is a bugfix
only release, no new features have been added.
Please note that this version requires C++11 support to build; see
UPDATING for more information.
Release notes for llvm and clang can be found here:
<http://llvm.org/releases/3.5.1/docs/ReleaseNotes.html>
<http://llvm.org/releases/3.5.1/tools/clang/docs/ReleaseNotes.html>
MFC after: 1 month
X-MFC-With: 276479
Diffstat (limited to 'contrib/llvm/patches')
-rw-r--r-- | contrib/llvm/patches/README.TXT | 8 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-07-llvm-r213960-ppc32-tls.diff | 547 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-07-llvm-r216989-r216990-fix-movw-armv6.diff (renamed from contrib/llvm/patches/patch-08-llvm-r216989-r216990-fix-movw-armv6.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-08-clang-r217410-i386-garbage-float.diff (renamed from contrib/llvm/patches/patch-09-clang-r217410-i386-garbage-float.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-09-llvm-r221709-debug-oom.diff (renamed from contrib/llvm/patches/patch-10-llvm-r221709-debug-oom.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-10-llvm-r222562-loop-rotate.diff (renamed from contrib/llvm/patches/patch-11-llvm-r222562-loop-rotate.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-11-add-llvm-gvn-option.diff (renamed from contrib/llvm/patches/patch-12-add-llvm-gvn-option.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-12-llvm-r218241-dwarf2-warning.diff (renamed from contrib/llvm/patches/patch-13-llvm-r218241-dwarf2-warning.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-13-llvm-r215352-aarch64-dyn-loader.diff (renamed from contrib/llvm/patches/patch-14-llvm-r215352-aarch64-dyn-loader.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-14-llvm-r216571-dynamiclib-usability.diff (renamed from contrib/llvm/patches/patch-15-llvm-r216571-dynamiclib-usability.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-15-clang-r221900-freebsd-aarch64.diff (renamed from contrib/llvm/patches/patch-16-clang-r221900-freebsd-aarch64.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-16-llvm-r222856-libapr-miscompile.diff (renamed from contrib/llvm/patches/patch-17-llvm-r222856-libapr-miscompile.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-17-llvm-r214802-armv6-cp10-cp11.diff (renamed from contrib/llvm/patches/patch-18-llvm-r214802-armv6-cp10-cp11.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-18-llvm-r215811-arm-fpu-directive.diff (renamed from contrib/llvm/patches/patch-19-llvm-r215811-arm-fpu-directive.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-19-enable-armv6-clrex.diff (renamed from contrib/llvm/patches/patch-20-enable-armv6-clrex.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-20-llvm-r223147-arm-cpu-directive.diff (renamed from contrib/llvm/patches/patch-22-llvm-r223147-arm-cpu-directive.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-21-llvm-r221170-ppc-vaarg.diff (renamed from contrib/llvm/patches/patch-23-llvm-r221170-ppc-vaarg.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-21-llvm-r223171-fix-vectorizer.diff | 617 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-22-llvm-r221791-ppc-small-pic.diff (renamed from contrib/llvm/patches/patch-24-llvm-r221791-ppc-small-pic.diff) | 40 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-23-llvm-r224415-ppc-local.diff (renamed from contrib/llvm/patches/patch-25-llvm-r224415-ppc-local.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-24-llvm-r213890-ppc-eh_frame.diff (renamed from contrib/llvm/patches/patch-26-llvm-r213890-ppc-eh_frame.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-25-llvm-r224890-ppc-ctr-tls-loop.diff (renamed from contrib/llvm/patches/patch-28-llvm-r224890-ppc-ctr-tls-loop.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-26-clang-r213790-type_traits-crash.diff (renamed from contrib/llvm/patches/patch-29-clang-r213790-type_traits-crash.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-27-llvm-r221703-ppc-tls_get_addr.diff | 504 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-27-llvm-r222587-arm-add-pc.diff (renamed from contrib/llvm/patches/patch-30-llvm-r222587-arm-add-pc.diff) | 0 | ||||
-rw-r--r-- | contrib/llvm/patches/patch-28-llvm-r222292-aarch64-no-neon.diff (renamed from contrib/llvm/patches/patch-31-llvm-r222292-aarch64-no-neon.diff) | 0 |
26 files changed, 24 insertions, 1692 deletions
diff --git a/contrib/llvm/patches/README.TXT b/contrib/llvm/patches/README.TXT index 86e3c2d..ad18595 100644 --- a/contrib/llvm/patches/README.TXT +++ b/contrib/llvm/patches/README.TXT @@ -1,11 +1,11 @@ This is a set of individual patches, which contain all the customizations to llvm/clang currently in the FreeBSD base system. These can be applied in -alphabetical order to a pristine llvm/clang 3.5.0 release source tree, for +alphabetical order to a pristine llvm/clang 3.5.1 release source tree, for example by doing: -svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_350/final llvm-3.5.0-final -svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_350/final llvm-3.5.0-final/tools/clang -cd llvm-3.5.0-final +svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_351/final llvm-3.5.1-final +svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_351/final llvm-3.5.1-final/tools/clang +cd llvm-3.5.1-final for p in /usr/src/contrib/llvm/patches/patch-*.diff; do patch -p0 -f -F0 -E -i $p -s || break done diff --git a/contrib/llvm/patches/patch-07-llvm-r213960-ppc32-tls.diff b/contrib/llvm/patches/patch-07-llvm-r213960-ppc32-tls.diff deleted file mode 100644 index 14d3517..0000000 --- a/contrib/llvm/patches/patch-07-llvm-r213960-ppc32-tls.diff +++ /dev/null @@ -1,547 +0,0 @@ -Pull in r213960 from upstream llvm trunk (by Hal Finkel): - - [PowerPC] Support TLS on PPC32/ELF - - Patch by Justin Hibbits! - -Introduced here: http://svnweb.freebsd.org/changeset/base/270147 - -Index: lib/Target/PowerPC/PPCISelLowering.h -=================================================================== ---- lib/Target/PowerPC/PPCISelLowering.h -+++ lib/Target/PowerPC/PPCISelLowering.h -@@ -181,6 +181,10 @@ namespace llvm { - /// on PPC32. - PPC32_GOT, - -+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and -+ /// local dynamic TLS on PPC32. -+ PPC32_PICGOT, -+ - /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec - /// TLS model, produces an ADDIS8 instruction that adds the GOT - /// base to sym\@got\@tprel\@ha. -Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp -=================================================================== ---- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp -+++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp -@@ -17,6 +17,7 @@ - #include "llvm/MC/MCExpr.h" - #include "llvm/MC/MCInst.h" - #include "llvm/MC/MCInstrInfo.h" -+#include "llvm/MC/MCSymbol.h" - #include "llvm/Support/CommandLine.h" - #include "llvm/Support/raw_ostream.h" - #include "llvm/Target/TargetOpcodes.h" -@@ -308,10 +309,16 @@ void PPCInstPrinter::printMemRegReg(const MCInst * - - void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { -- printBranchOperand(MI, OpNo, O); -+ // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must -+ // come at the _end_ of the expression. -+ const MCOperand &Op = MI->getOperand(OpNo); -+ const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr()); -+ O << refExp.getSymbol().getName(); - O << '('; - printOperand(MI, OpNo+1, O); - O << ')'; -+ if (refExp.getKind() != MCSymbolRefExpr::VK_None) -+ O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind()); - } - - -Index: lib/Target/PowerPC/PPCInstrInfo.td -=================================================================== ---- lib/Target/PowerPC/PPCInstrInfo.td -+++ lib/Target/PowerPC/PPCInstrInfo.td -@@ -588,6 +588,12 @@ def tlsreg32 : Operand<i32> { - let EncoderMethod = "getTLSRegEncoding"; - let ParserMatchClass = PPCTLSRegOperand; - } -+def tlsgd32 : Operand<i32> {} -+def tlscall32 : Operand<i32> { -+ let PrintMethod = "printTLSCall"; -+ let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym); -+ let EncoderMethod = "getTLSCallEncoding"; -+} - - // PowerPC Predicate operand. - def pred : Operand<OtherVT> { -@@ -1071,6 +1077,8 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { - "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>; - - let isCodeGenOnly = 1 in { -+ def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func), -+ "bl $func", IIC_BrB, []>; - def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst), - "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">; - def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst), -@@ -2396,13 +2404,45 @@ def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0 - def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT", - [(set i32:$rD, (PPCppc32GOT))]>; - -+// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode. -+// This uses two output registers, the first as the real output, the second as a -+// temporary register, used internally in code generation. -+def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", -+ []>, NoEncode<"$rT">; -+ - def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg), -- "#LDgotTprelL32", -- [(set i32:$rD, -- (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; -+ "#LDgotTprelL32", -+ [(set i32:$rD, -+ (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>; - def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g), - (ADD4TLS $in, tglobaltlsaddr:$g)>; - -+def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), -+ "#ADDItlsgdL32", -+ [(set i32:$rD, -+ (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; -+def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), -+ "#GETtlsADDR32", -+ [(set i32:$rD, -+ (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; -+def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), -+ "#ADDItlsldL32", -+ [(set i32:$rD, -+ (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; -+def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), -+ "#GETtlsldADDR32", -+ [(set i32:$rD, -+ (PPCgetTlsldAddr i32:$reg, tglobaltlsaddr:$sym))]>; -+def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), -+ "#ADDIdtprelL32", -+ [(set i32:$rD, -+ (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>; -+def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), -+ "#ADDISdtprelHA32", -+ [(set i32:$rD, -+ (PPCaddisDtprelHA i32:$reg, -+ tglobaltlsaddr:$disp))]>; -+ - // Support for Position-independent code - def LWZtoc: Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg), - "#LWZtoc", -Index: lib/Target/PowerPC/PPCISelLowering.cpp -=================================================================== ---- lib/Target/PowerPC/PPCISelLowering.cpp -+++ lib/Target/PowerPC/PPCISelLowering.cpp -@@ -1685,47 +1685,61 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S - - if (Model == TLSModel::GeneralDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); -- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); -- SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, -- GOTReg, TGA); -+ SDValue GOTPtr; -+ if (is64bit) { -+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); -+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, -+ GOTReg, TGA); -+ } else { -+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); -+ } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, -- GOTEntryHi, TGA); -+ GOTPtr, TGA); - - // We need a chain node, and don't have one handy. The underlying - // call has no side effects, so using the function entry node - // suffices. - SDValue Chain = DAG.getEntryNode(); -- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); -- SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); -+ Chain = DAG.getCopyToReg(Chain, dl, -+ is64bit ? PPC::X3 : PPC::R3, GOTEntry); -+ SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3, -+ is64bit ? MVT::i64 : MVT::i32); - SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, - PtrVT, ParmReg, TGA); - // The return value from GET_TLS_ADDR really is in X3 already, but - // some hacks are needed here to tie everything together. The extra - // copies dissolve during subsequent transforms. -- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); -- return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); -+ Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr); -+ return DAG.getCopyFromReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, PtrVT); - } - - if (Model == TLSModel::LocalDynamic) { - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); -- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); -- SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, -- GOTReg, TGA); -+ SDValue GOTPtr; -+ if (is64bit) { -+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); -+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, -+ GOTReg, TGA); -+ } else { -+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); -+ } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, -- GOTEntryHi, TGA); -+ GOTPtr, TGA); - - // We need a chain node, and don't have one handy. The underlying - // call has no side effects, so using the function entry node - // suffices. - SDValue Chain = DAG.getEntryNode(); -- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); -- SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); -+ Chain = DAG.getCopyToReg(Chain, dl, -+ is64bit ? PPC::X3 : PPC::R3, GOTEntry); -+ SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3, -+ is64bit ? MVT::i64 : MVT::i32); - SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, - PtrVT, ParmReg, TGA); - // The return value from GET_TLSLD_ADDR really is in X3 already, but - // some hacks are needed here to tie everything together. The extra - // copies dissolve during subsequent transforms. -- Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); -+ Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr); - SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, - Chain, ParmReg, TGA); - return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); -Index: lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp -=================================================================== ---- lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp -+++ lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp -@@ -236,7 +236,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(con - Type = ELF::R_PPC64_DTPREL16_HIGHESTA; - break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSGD: -- Type = ELF::R_PPC64_GOT_TLSGD16; -+ if (is64Bit()) -+ Type = ELF::R_PPC64_GOT_TLSGD16; -+ else -+ Type = ELF::R_PPC_GOT_TLSGD16; - break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO: - Type = ELF::R_PPC64_GOT_TLSGD16_LO; -@@ -248,7 +251,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(con - Type = ELF::R_PPC64_GOT_TLSGD16_HA; - break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD: -- Type = ELF::R_PPC64_GOT_TLSLD16; -+ if (is64Bit()) -+ Type = ELF::R_PPC64_GOT_TLSLD16; -+ else -+ Type = ELF::R_PPC_GOT_TLSLD16; - break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO: - Type = ELF::R_PPC64_GOT_TLSLD16_LO; -@@ -344,13 +350,22 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(con - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_PPC_TLSGD: -- Type = ELF::R_PPC64_TLSGD; -+ if (is64Bit()) -+ Type = ELF::R_PPC64_TLSGD; -+ else -+ Type = ELF::R_PPC_TLSGD; - break; - case MCSymbolRefExpr::VK_PPC_TLSLD: -- Type = ELF::R_PPC64_TLSLD; -+ if (is64Bit()) -+ Type = ELF::R_PPC64_TLSLD; -+ else -+ Type = ELF::R_PPC_TLSLD; - break; - case MCSymbolRefExpr::VK_PPC_TLS: -- Type = ELF::R_PPC64_TLS; -+ if (is64Bit()) -+ Type = ELF::R_PPC64_TLS; -+ else -+ Type = ELF::R_PPC_TLS; - break; - } - break; -Index: lib/Target/PowerPC/PPCAsmPrinter.cpp -=================================================================== ---- lib/Target/PowerPC/PPCAsmPrinter.cpp -+++ lib/Target/PowerPC/PPCAsmPrinter.cpp -@@ -573,6 +573,34 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - return; - } - -+ case PPC::PPC32PICGOT: { -+ MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); -+ MCSymbol *GOTRef = OutContext.CreateTempSymbol(); -+ MCSymbol *NextInstr = OutContext.CreateTempSymbol(); -+ -+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL) -+ // FIXME: We would like an efficient form for this, so we don't have to do -+ // a lot of extra uniquing. -+ .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext))); -+ const MCExpr *OffsExpr = -+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext), -+ MCSymbolRefExpr::Create(GOTRef, OutContext), -+ OutContext); -+ OutStreamer.EmitLabel(GOTRef); -+ OutStreamer.EmitValue(OffsExpr, 4); -+ OutStreamer.EmitLabel(NextInstr); -+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR) -+ .addReg(MI->getOperand(0).getReg())); -+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ) -+ .addReg(MI->getOperand(1).getReg()) -+ .addImm(0) -+ .addReg(MI->getOperand(0).getReg())); -+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4) -+ .addReg(MI->getOperand(0).getReg()) -+ .addReg(MI->getOperand(1).getReg()) -+ .addReg(MI->getOperand(0).getReg())); -+ return; -+ } - case PPC::PPC32GOT: { - MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_")); - const MCExpr *SymGotTlsL = -@@ -606,31 +634,43 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - .addExpr(SymGotTlsGD)); - return; - } -- case PPC::ADDItlsgdL: { -+ case PPC::ADDItlsgdL: - // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> - // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l -- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); -+ case PPC::ADDItlsgdL32: { -+ // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym> -+ // Into: %Rd = ADDI %Rs, sym@got@tlsgd - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymGotTlsGD = -- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO, -+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ? -+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO : -+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD, - OutContext); -- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) -- .addReg(MI->getOperand(0).getReg()) -- .addReg(MI->getOperand(1).getReg()) -- .addExpr(SymGotTlsGD)); -+ EmitToStreamer(OutStreamer, -+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) -+ .addReg(MI->getOperand(0).getReg()) -+ .addReg(MI->getOperand(1).getReg()) -+ .addExpr(SymGotTlsGD)); - return; - } -- case PPC::GETtlsADDR: { -+ case PPC::GETtlsADDR: - // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> - // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd) -- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); -+ case PPC::GETtlsADDR32: { -+ // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym> -+ // Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT - - StringRef Name = "__tls_get_addr"; - MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); -+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; -+ -+ if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && -+ TM.getRelocationModel() == Reloc::PIC_) -+ Kind = MCSymbolRefExpr::VK_PLT; - const MCSymbolRefExpr *TlsRef = -- MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); -+ MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); -@@ -637,9 +677,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, - OutContext); -- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) -- .addExpr(TlsRef) -- .addExpr(SymVar)); -+ EmitToStreamer(OutStreamer, -+ MCInstBuilder(Subtarget.isPPC64() ? -+ PPC::BL8_NOP_TLS : PPC::BL_TLS) -+ .addExpr(TlsRef) -+ .addExpr(SymVar)); - return; - } - case PPC::ADDIStlsldHA: { -@@ -658,31 +700,44 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - .addExpr(SymGotTlsLD)); - return; - } -- case PPC::ADDItlsldL: { -+ case PPC::ADDItlsldL: - // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> - // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l -- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); -+ case PPC::ADDItlsldL32: { -+ // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym> -+ // Into: %Rd = ADDI %Rs, sym@got@tlsld - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); - const MCExpr *SymGotTlsLD = -- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO, -+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ? -+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO : -+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD, - OutContext); -- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) -- .addReg(MI->getOperand(0).getReg()) -- .addReg(MI->getOperand(1).getReg()) -- .addExpr(SymGotTlsLD)); -+ EmitToStreamer(OutStreamer, -+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) -+ .addReg(MI->getOperand(0).getReg()) -+ .addReg(MI->getOperand(1).getReg()) -+ .addExpr(SymGotTlsLD)); - return; - } -- case PPC::GETtlsldADDR: { -+ case PPC::GETtlsldADDR: - // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> - // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld) -- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); -+ case PPC::GETtlsldADDR32: { -+ // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym> -+ // Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT - - StringRef Name = "__tls_get_addr"; - MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); -+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; -+ -+ if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && -+ TM.getRelocationModel() == Reloc::PIC_) -+ Kind = MCSymbolRefExpr::VK_PLT; -+ - const MCSymbolRefExpr *TlsRef = -- MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext); -+ MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); -@@ -689,15 +744,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - const MCExpr *SymVar = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, - OutContext); -- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP_TLS) -- .addExpr(TlsRef) -- .addExpr(SymVar)); -+ EmitToStreamer(OutStreamer, -+ MCInstBuilder(Subtarget.isPPC64() ? -+ PPC::BL8_NOP_TLS : PPC::BL_TLS) -+ .addExpr(TlsRef) -+ .addExpr(SymVar)); - return; - } -- case PPC::ADDISdtprelHA: { -+ case PPC::ADDISdtprelHA: - // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> - // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha -- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); -+ case PPC::ADDISdtprelHA32: { -+ // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym> -+ // Into: %Rd = ADDIS %R3, sym@dtprel@ha - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); -@@ -704,16 +763,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - const MCExpr *SymDtprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA, - OutContext); -- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8) -- .addReg(MI->getOperand(0).getReg()) -- .addReg(PPC::X3) -- .addExpr(SymDtprel)); -+ EmitToStreamer(OutStreamer, -+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS) -+ .addReg(MI->getOperand(0).getReg()) -+ .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3) -+ .addExpr(SymDtprel)); - return; - } -- case PPC::ADDIdtprelL: { -+ case PPC::ADDIdtprelL: - // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> - // Into: %Xd = ADDI8 %Xs, sym@dtprel@l -- assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); -+ case PPC::ADDIdtprelL32: { -+ // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym> -+ // Into: %Rd = ADDI %Rs, sym@dtprel@l - const MachineOperand &MO = MI->getOperand(2); - const GlobalValue *GValue = MO.getGlobal(); - MCSymbol *MOSymbol = getSymbol(GValue); -@@ -720,10 +782,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - const MCExpr *SymDtprel = - MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO, - OutContext); -- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI8) -- .addReg(MI->getOperand(0).getReg()) -- .addReg(MI->getOperand(1).getReg()) -- .addExpr(SymDtprel)); -+ EmitToStreamer(OutStreamer, -+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI) -+ .addReg(MI->getOperand(0).getReg()) -+ .addReg(MI->getOperand(1).getReg()) -+ .addExpr(SymDtprel)); - return; - } - case PPC::MFOCRF: -Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp -=================================================================== ---- lib/Target/PowerPC/PPCISelDAGToDAG.cpp -+++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp -@@ -1473,6 +1473,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { - return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, - SDValue(Tmp, 0), GA); - } -+ case PPCISD::PPC32_PICGOT: { -+ // Generate a PIC-safe GOT reference. -+ assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && -+ "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); -+ return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32); -+ } - case PPCISD::VADD_SPLAT: { - // This expands into one of three sequences, depending on whether - // the first operand is odd or even, positive or negative. -Index: test/CodeGen/PowerPC/tls-pic.ll -=================================================================== ---- test/CodeGen/PowerPC/tls-pic.ll -+++ test/CodeGen/PowerPC/tls-pic.ll -@@ -1,5 +1,7 @@ - ; RUN: llc -march=ppc64 -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0 %s - ; RUN: llc -march=ppc64 -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1 %s -+; RUN: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s -+; RUN: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s - - target triple = "powerpc64-unknown-linux-gnu" - ; Test correct assembly code generation for thread-local storage using -@@ -22,6 +24,16 @@ entry: - ; OPT0-NEXT: nop - ; OPT0: addis [[REG2:[0-9]+]], 3, a@dtprel@ha - ; OPT0-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l -+; OPT0-32-LABEL: main -+; OPT0-32: addi {{[0-9]+}}, {{[0-9]+}}, a@got@tlsld -+; OPT0-32: bl __tls_get_addr(a@tlsld)@PLT -+; OPT0-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha -+; OPT0-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l -+; OPT1-32-LABEL: main -+; OPT1-32: addi 3, {{[0-9]+}}, a@got@tlsld -+; OPT1-32: bl __tls_get_addr(a@tlsld)@PLT -+; OPT1-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha -+; OPT1-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l - - ; Test peephole optimization for thread-local storage using the - ; local dynamic model. -@@ -52,4 +64,6 @@ entry: - ; OPT1-NEXT: addi 3, [[REG]], a2@got@tlsgd@l - ; OPT1: bl __tls_get_addr(a2@tlsgd) - ; OPT1-NEXT: nop -- -+; OPT1-32-LABEL: main2 -+; OPT1-32: addi 3, {{[0-9]+}}, a2@got@tlsgd -+; OPT1-32: bl __tls_get_addr(a2@tlsgd)@PLT diff --git a/contrib/llvm/patches/patch-08-llvm-r216989-r216990-fix-movw-armv6.diff b/contrib/llvm/patches/patch-07-llvm-r216989-r216990-fix-movw-armv6.diff index 9861d9f..9861d9f 100644 --- a/contrib/llvm/patches/patch-08-llvm-r216989-r216990-fix-movw-armv6.diff +++ b/contrib/llvm/patches/patch-07-llvm-r216989-r216990-fix-movw-armv6.diff diff --git a/contrib/llvm/patches/patch-09-clang-r217410-i386-garbage-float.diff b/contrib/llvm/patches/patch-08-clang-r217410-i386-garbage-float.diff index 26cd2bf..26cd2bf 100644 --- a/contrib/llvm/patches/patch-09-clang-r217410-i386-garbage-float.diff +++ b/contrib/llvm/patches/patch-08-clang-r217410-i386-garbage-float.diff diff --git a/contrib/llvm/patches/patch-10-llvm-r221709-debug-oom.diff b/contrib/llvm/patches/patch-09-llvm-r221709-debug-oom.diff index a00b9f1..a00b9f1 100644 --- a/contrib/llvm/patches/patch-10-llvm-r221709-debug-oom.diff +++ b/contrib/llvm/patches/patch-09-llvm-r221709-debug-oom.diff diff --git a/contrib/llvm/patches/patch-11-llvm-r222562-loop-rotate.diff b/contrib/llvm/patches/patch-10-llvm-r222562-loop-rotate.diff index bc09f27..bc09f27 100644 --- a/contrib/llvm/patches/patch-11-llvm-r222562-loop-rotate.diff +++ b/contrib/llvm/patches/patch-10-llvm-r222562-loop-rotate.diff diff --git a/contrib/llvm/patches/patch-12-add-llvm-gvn-option.diff b/contrib/llvm/patches/patch-11-add-llvm-gvn-option.diff index fcd051f..fcd051f 100644 --- a/contrib/llvm/patches/patch-12-add-llvm-gvn-option.diff +++ b/contrib/llvm/patches/patch-11-add-llvm-gvn-option.diff diff --git a/contrib/llvm/patches/patch-13-llvm-r218241-dwarf2-warning.diff b/contrib/llvm/patches/patch-12-llvm-r218241-dwarf2-warning.diff index 6127c78..6127c78 100644 --- a/contrib/llvm/patches/patch-13-llvm-r218241-dwarf2-warning.diff +++ b/contrib/llvm/patches/patch-12-llvm-r218241-dwarf2-warning.diff diff --git a/contrib/llvm/patches/patch-14-llvm-r215352-aarch64-dyn-loader.diff b/contrib/llvm/patches/patch-13-llvm-r215352-aarch64-dyn-loader.diff index d4c3880..d4c3880 100644 --- a/contrib/llvm/patches/patch-14-llvm-r215352-aarch64-dyn-loader.diff +++ b/contrib/llvm/patches/patch-13-llvm-r215352-aarch64-dyn-loader.diff diff --git a/contrib/llvm/patches/patch-15-llvm-r216571-dynamiclib-usability.diff b/contrib/llvm/patches/patch-14-llvm-r216571-dynamiclib-usability.diff index ff0a6d1..ff0a6d1 100644 --- a/contrib/llvm/patches/patch-15-llvm-r216571-dynamiclib-usability.diff +++ b/contrib/llvm/patches/patch-14-llvm-r216571-dynamiclib-usability.diff diff --git a/contrib/llvm/patches/patch-16-clang-r221900-freebsd-aarch64.diff b/contrib/llvm/patches/patch-15-clang-r221900-freebsd-aarch64.diff index 1c9eab1..1c9eab1 100644 --- a/contrib/llvm/patches/patch-16-clang-r221900-freebsd-aarch64.diff +++ b/contrib/llvm/patches/patch-15-clang-r221900-freebsd-aarch64.diff diff --git a/contrib/llvm/patches/patch-17-llvm-r222856-libapr-miscompile.diff b/contrib/llvm/patches/patch-16-llvm-r222856-libapr-miscompile.diff index c9e64d9..c9e64d9 100644 --- a/contrib/llvm/patches/patch-17-llvm-r222856-libapr-miscompile.diff +++ b/contrib/llvm/patches/patch-16-llvm-r222856-libapr-miscompile.diff diff --git a/contrib/llvm/patches/patch-18-llvm-r214802-armv6-cp10-cp11.diff b/contrib/llvm/patches/patch-17-llvm-r214802-armv6-cp10-cp11.diff index 17e1c75..17e1c75 100644 --- a/contrib/llvm/patches/patch-18-llvm-r214802-armv6-cp10-cp11.diff +++ b/contrib/llvm/patches/patch-17-llvm-r214802-armv6-cp10-cp11.diff diff --git a/contrib/llvm/patches/patch-19-llvm-r215811-arm-fpu-directive.diff b/contrib/llvm/patches/patch-18-llvm-r215811-arm-fpu-directive.diff index c11365c..c11365c 100644 --- a/contrib/llvm/patches/patch-19-llvm-r215811-arm-fpu-directive.diff +++ b/contrib/llvm/patches/patch-18-llvm-r215811-arm-fpu-directive.diff diff --git a/contrib/llvm/patches/patch-20-enable-armv6-clrex.diff b/contrib/llvm/patches/patch-19-enable-armv6-clrex.diff index fd77f45..fd77f45 100644 --- a/contrib/llvm/patches/patch-20-enable-armv6-clrex.diff +++ b/contrib/llvm/patches/patch-19-enable-armv6-clrex.diff diff --git a/contrib/llvm/patches/patch-22-llvm-r223147-arm-cpu-directive.diff b/contrib/llvm/patches/patch-20-llvm-r223147-arm-cpu-directive.diff index e97dc2e..e97dc2e 100644 --- a/contrib/llvm/patches/patch-22-llvm-r223147-arm-cpu-directive.diff +++ b/contrib/llvm/patches/patch-20-llvm-r223147-arm-cpu-directive.diff diff --git a/contrib/llvm/patches/patch-23-llvm-r221170-ppc-vaarg.diff b/contrib/llvm/patches/patch-21-llvm-r221170-ppc-vaarg.diff index 7820d6b..7820d6b 100644 --- a/contrib/llvm/patches/patch-23-llvm-r221170-ppc-vaarg.diff +++ b/contrib/llvm/patches/patch-21-llvm-r221170-ppc-vaarg.diff diff --git a/contrib/llvm/patches/patch-21-llvm-r223171-fix-vectorizer.diff b/contrib/llvm/patches/patch-21-llvm-r223171-fix-vectorizer.diff deleted file mode 100644 index 306f950..0000000 --- a/contrib/llvm/patches/patch-21-llvm-r223171-fix-vectorizer.diff +++ /dev/null @@ -1,617 +0,0 @@ -Pull in r223170 from upstream llvm trunk (by Michael Zolotukhin): - - Apply loop-rotate to several vectorizer tests. - - Such loops shouldn't be vectorized due to the loops form. - After applying loop-rotate (+simplifycfg) the tests again start to check - what they are intended to check. - -Pull in r223171 from upstream llvm trunk (by Michael Zolotukhin): - - PR21302. Vectorize only bottom-tested loops. - - rdar://problem/18886083 - -This fixes a bug in the llvm vectorizer, which could sometimes cause -vectorized loops to perform an additional iteration, leading to possible -buffer overruns. Symptoms of this, which are usually segfaults, were -first noticed when building gcc ports, here: - -https://lists.freebsd.org/pipermail/freebsd-ports/2014-September/095466.html -https://lists.freebsd.org/pipermail/freebsd-toolchain/2014-September/001211.html - -Note: because this is applied on top of llvm/clang 3.5.0, this fix is -slightly different from the one just checked into head in r275633. - -Introduced here: http://svnweb.freebsd.org/changeset/base/275635 - -Index: lib/Transforms/Vectorize/LoopVectorize.cpp -=================================================================== ---- lib/Transforms/Vectorize/LoopVectorize.cpp -+++ lib/Transforms/Vectorize/LoopVectorize.cpp -@@ -3466,6 +3466,15 @@ bool LoopVectorizationLegality::canVectorize() { - return false; - } - -+ // We only handle bottom-tested loops, i.e. loop in which the condition is -+ // checked at the end of each iteration. With that we can assume that all -+ // instructions in the loop are executed the same number of times. -+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { -+ emitAnalysis( -+ Report() << "loop control flow is not understood by vectorizer"); -+ return false; -+ } -+ - // We need to have a loop header. - DEBUG(dbgs() << "LV: Found a loop: " << - TheLoop->getHeader()->getName() << '\n'); -Index: test/Transforms/LoopVectorize/vect.stats.ll -=================================================================== ---- test/Transforms/LoopVectorize/vect.stats.ll -+++ test/Transforms/LoopVectorize/vect.stats.ll -@@ -13,53 +13,47 @@ target triple = "x86_64-unknown-linux-gnu" - - define void @vectorized(float* nocapture %a, i64 %size) { - entry: -- %cmp1 = icmp sgt i64 %size, 0 -- br i1 %cmp1, label %for.header, label %for.end -+ %cmp1 = icmp sle i64 %size, 0 -+ %cmp21 = icmp sgt i64 0, %size -+ %or.cond = or i1 %cmp1, %cmp21 -+ br i1 %or.cond, label %for.end, label %for.body - --for.header: -- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] -- %cmp2 = icmp sgt i64 %indvars.iv, %size -- br i1 %cmp2, label %for.end, label %for.body -- --for.body: -- -- %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv -+for.body: ; preds = %entry, %for.body -+ %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] -+ %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv2 - %0 = load float* %arrayidx, align 4 - %mul = fmul float %0, %0 - store float %mul, float* %arrayidx, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1 -+ %cmp2 = icmp sgt i64 %indvars.iv.next, %size -+ br i1 %cmp2, label %for.end, label %for.body - -- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -- br label %for.header -- --for.end: -+for.end: ; preds = %entry, %for.body - ret void - } - - define void @not_vectorized(float* nocapture %a, i64 %size) { - entry: -- %cmp1 = icmp sgt i64 %size, 0 -- br i1 %cmp1, label %for.header, label %for.end -+ %cmp1 = icmp sle i64 %size, 0 -+ %cmp21 = icmp sgt i64 0, %size -+ %or.cond = or i1 %cmp1, %cmp21 -+ br i1 %or.cond, label %for.end, label %for.body - --for.header: -- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] -- %cmp2 = icmp sgt i64 %indvars.iv, %size -- br i1 %cmp2, label %for.end, label %for.body -- --for.body: -- -- %0 = add nsw i64 %indvars.iv, -5 -+for.body: ; preds = %entry, %for.body -+ %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] -+ %0 = add nsw i64 %indvars.iv2, -5 - %arrayidx = getelementptr inbounds float* %a, i64 %0 - %1 = load float* %arrayidx, align 4 -- %2 = add nsw i64 %indvars.iv, 2 -+ %2 = add nsw i64 %indvars.iv2, 2 - %arrayidx2 = getelementptr inbounds float* %a, i64 %2 - %3 = load float* %arrayidx2, align 4 - %mul = fmul float %1, %3 -- %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv -+ %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv2 - store float %mul, float* %arrayidx4, align 4 -+ %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1 -+ %cmp2 = icmp sgt i64 %indvars.iv.next, %size -+ br i1 %cmp2, label %for.end, label %for.body - -- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -- br label %for.header -- --for.end: -+for.end: ; preds = %entry, %for.body - ret void --} -\ No newline at end of file -+} -Index: test/Transforms/LoopVectorize/loop-form.ll -=================================================================== ---- test/Transforms/LoopVectorize/loop-form.ll -+++ test/Transforms/LoopVectorize/loop-form.ll -@@ -0,0 +1,31 @@ -+; RUN: opt -S -loop-vectorize < %s | FileCheck %s -+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -+ -+; Check that we vectorize only bottom-tested loops. -+; This is a reduced testcase from PR21302. -+; -+; rdar://problem/18886083 -+ -+%struct.X = type { i32, i16 } -+; CHECK-LABEL: @foo( -+; CHECK-NOT: vector.body -+ -+define void @foo(i32 %n) { -+entry: -+ br label %for.cond -+ -+for.cond: -+ %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] -+ %cmp = icmp slt i32 %i, %n -+ br i1 %cmp, label %for.body, label %if.end -+ -+for.body: -+ %iprom = sext i32 %i to i64 -+ %b = getelementptr inbounds %struct.X* undef, i64 %iprom, i32 1 -+ store i16 0, i16* %b, align 4 -+ %inc = add nsw i32 %i, 1 -+ br label %for.cond -+ -+if.end: -+ ret void -+} -Index: test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll -=================================================================== ---- test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll -+++ test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll -@@ -8,26 +8,24 @@ define void @add_ints_1_1_1(i32 addrspace(1)* %a, - ; CHECK-LABEL: @add_ints_1_1_1( - ; CHECK: <4 x i32> - ; CHECK: ret -+ - entry: -- br label %for.cond -+ br label %for.body - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp ult i64 %i.0, 200 -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0 -+for.body: ; preds = %entry, %for.body -+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01 - %0 = load i32 addrspace(1)* %arrayidx, align 4 -- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0 -+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01 - %1 = load i32 addrspace(1)* %arrayidx1, align 4 - %add = add nsw i32 %0, %1 -- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0 -+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01 - store i32 %add, i32 addrspace(1)* %arrayidx2, align 4 -- %inc = add i64 %i.0, 1 -- br label %for.cond -+ %inc = add i64 %i.01, 1 -+ %cmp = icmp ult i64 %inc, 200 -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body - ret void - } - -@@ -35,26 +33,24 @@ define void @add_ints_as_1_0_0(i32 addrspace(1)* % - ; CHECK-LABEL: @add_ints_as_1_0_0( - ; CHECK-NOT: <4 x i32> - ; CHECK: ret -+ - entry: -- br label %for.cond -+ br label %for.body - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp ult i64 %i.0, 200 -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %arrayidx = getelementptr inbounds i32* %b, i64 %i.0 -+for.body: ; preds = %entry, %for.body -+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -+ %arrayidx = getelementptr inbounds i32* %b, i64 %i.01 - %0 = load i32* %arrayidx, align 4 -- %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0 -+ %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01 - %1 = load i32* %arrayidx1, align 4 - %add = add nsw i32 %0, %1 -- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.0 -+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %i.01 - store i32 %add, i32 addrspace(1)* %arrayidx2, align 4 -- %inc = add i64 %i.0, 1 -- br label %for.cond -+ %inc = add i64 %i.01, 1 -+ %cmp = icmp ult i64 %inc, 200 -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body - ret void - } - -@@ -62,26 +58,24 @@ define void @add_ints_as_0_1_0(i32* %a, i32 addrsp - ; CHECK-LABEL: @add_ints_as_0_1_0( - ; CHECK-NOT: <4 x i32> - ; CHECK: ret -+ - entry: -- br label %for.cond -+ br label %for.body - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp ult i64 %i.0, 200 -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0 -+for.body: ; preds = %entry, %for.body -+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01 - %0 = load i32 addrspace(1)* %arrayidx, align 4 -- %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.0 -+ %arrayidx1 = getelementptr inbounds i32* %c, i64 %i.01 - %1 = load i32* %arrayidx1, align 4 - %add = add nsw i32 %0, %1 -- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0 -+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01 - store i32 %add, i32* %arrayidx2, align 4 -- %inc = add i64 %i.0, 1 -- br label %for.cond -+ %inc = add i64 %i.01, 1 -+ %cmp = icmp ult i64 %inc, 200 -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body - ret void - } - -@@ -89,26 +83,24 @@ define void @add_ints_as_0_1_1(i32* %a, i32 addrsp - ; CHECK-LABEL: @add_ints_as_0_1_1( - ; CHECK-NOT: <4 x i32> - ; CHECK: ret -+ - entry: -- br label %for.cond -+ br label %for.body - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp ult i64 %i.0, 200 -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0 -+for.body: ; preds = %entry, %for.body -+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01 - %0 = load i32 addrspace(1)* %arrayidx, align 4 -- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.0 -+ %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %c, i64 %i.01 - %1 = load i32 addrspace(1)* %arrayidx1, align 4 - %add = add nsw i32 %0, %1 -- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0 -+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01 - store i32 %add, i32* %arrayidx2, align 4 -- %inc = add i64 %i.0, 1 -- br label %for.cond -+ %inc = add i64 %i.01, 1 -+ %cmp = icmp ult i64 %inc, 200 -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body - ret void - } - -@@ -116,26 +108,24 @@ define void @add_ints_as_0_1_2(i32* %a, i32 addrsp - ; CHECK-LABEL: @add_ints_as_0_1_2( - ; CHECK-NOT: <4 x i32> - ; CHECK: ret -+ - entry: -- br label %for.cond -+ br label %for.body - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp ult i64 %i.0, 200 -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.0 -+for.body: ; preds = %entry, %for.body -+ %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ] -+ %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %i.01 - %0 = load i32 addrspace(1)* %arrayidx, align 4 -- %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.0 -+ %arrayidx1 = getelementptr inbounds i32 addrspace(2)* %c, i64 %i.01 - %1 = load i32 addrspace(2)* %arrayidx1, align 4 - %add = add nsw i32 %0, %1 -- %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.0 -+ %arrayidx2 = getelementptr inbounds i32* %a, i64 %i.01 - store i32 %add, i32* %arrayidx2, align 4 -- %inc = add i64 %i.0, 1 -- br label %for.cond -+ %inc = add i64 %i.01, 1 -+ %cmp = icmp ult i64 %inc, 200 -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body - ret void - } - -Index: test/Transforms/LoopVectorize/runtime-check-address-space.ll -=================================================================== ---- test/Transforms/LoopVectorize/runtime-check-address-space.ll -+++ test/Transforms/LoopVectorize/runtime-check-address-space.ll -@@ -31,25 +31,23 @@ define void @foo(i32 addrspace(1)* %a, i32 addrspa - ; CHECK: ret - - entry: -- br label %for.cond -+ %cmp1 = icmp slt i32 0, %n -+ br i1 %cmp1, label %for.body, label %for.end - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp slt i32 %i.0, %n -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %idxprom = sext i32 %i.0 to i64 -+for.body: ; preds = %entry, %for.body -+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -+ %idxprom = sext i32 %i.02 to i64 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom - %0 = load i32 addrspace(1)* %arrayidx, align 4 - %mul = mul nsw i32 %0, 3 -- %idxprom1 = sext i32 %i.0 to i64 -+ %idxprom1 = sext i32 %i.02 to i64 - %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1 - store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 -- %inc = add nsw i32 %i.0, 1 -- br label %for.cond -+ %inc = add nsw i32 %i.02, 1 -+ %cmp = icmp slt i32 %inc, %n -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body, %entry - ret void - } - -@@ -60,25 +58,23 @@ define void @bar0(i32* %a, i32 addrspace(1)* %b, i - ; CHECK: ret - - entry: -- br label %for.cond -+ %cmp1 = icmp slt i32 0, %n -+ br i1 %cmp1, label %for.body, label %for.end - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp slt i32 %i.0, %n -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %idxprom = sext i32 %i.0 to i64 -+for.body: ; preds = %entry, %for.body -+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -+ %idxprom = sext i32 %i.02 to i64 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom - %0 = load i32 addrspace(1)* %arrayidx, align 4 - %mul = mul nsw i32 %0, 3 -- %idxprom1 = sext i32 %i.0 to i64 -+ %idxprom1 = sext i32 %i.02 to i64 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1 - store i32 %mul, i32* %arrayidx2, align 4 -- %inc = add nsw i32 %i.0, 1 -- br label %for.cond -+ %inc = add nsw i32 %i.02, 1 -+ %cmp = icmp slt i32 %inc, %n -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body, %entry - ret void - } - -@@ -89,25 +85,23 @@ define void @bar1(i32 addrspace(1)* %a, i32* %b, i - ; CHECK: ret - - entry: -- br label %for.cond -+ %cmp1 = icmp slt i32 0, %n -+ br i1 %cmp1, label %for.body, label %for.end - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp slt i32 %i.0, %n -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %idxprom = sext i32 %i.0 to i64 -+for.body: ; preds = %entry, %for.body -+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -+ %idxprom = sext i32 %i.02 to i64 - %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom - %0 = load i32* %arrayidx, align 4 - %mul = mul nsw i32 %0, 3 -- %idxprom1 = sext i32 %i.0 to i64 -+ %idxprom1 = sext i32 %i.02 to i64 - %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1 - store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 -- %inc = add nsw i32 %i.0, 1 -- br label %for.cond -+ %inc = add nsw i32 %i.02, 1 -+ %cmp = icmp slt i32 %inc, %n -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body, %entry - ret void - } - -@@ -119,25 +113,23 @@ define void @bar2(i32* noalias %a, i32 addrspace(1 - ; CHECK: ret - - entry: -- br label %for.cond -+ %cmp1 = icmp slt i32 0, %n -+ br i1 %cmp1, label %for.body, label %for.end - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp slt i32 %i.0, %n -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %idxprom = sext i32 %i.0 to i64 -+for.body: ; preds = %entry, %for.body -+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -+ %idxprom = sext i32 %i.02 to i64 - %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom - %0 = load i32 addrspace(1)* %arrayidx, align 4 - %mul = mul nsw i32 %0, 3 -- %idxprom1 = sext i32 %i.0 to i64 -+ %idxprom1 = sext i32 %i.02 to i64 - %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1 - store i32 %mul, i32* %arrayidx2, align 4 -- %inc = add nsw i32 %i.0, 1 -- br label %for.cond -+ %inc = add nsw i32 %i.02, 1 -+ %cmp = icmp slt i32 %inc, %n -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body, %entry - ret void - } - -@@ -149,25 +141,23 @@ define void @arst0(i32* %b, i32 %n) #0 { - ; CHECK: ret - - entry: -- br label %for.cond -+ %cmp1 = icmp slt i32 0, %n -+ br i1 %cmp1, label %for.body, label %for.end - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp slt i32 %i.0, %n -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %idxprom = sext i32 %i.0 to i64 -+for.body: ; preds = %entry, %for.body -+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -+ %idxprom = sext i32 %i.02 to i64 - %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom - %0 = load i32* %arrayidx, align 4 - %mul = mul nsw i32 %0, 3 -- %idxprom1 = sext i32 %i.0 to i64 -+ %idxprom1 = sext i32 %i.02 to i64 - %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1 - store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 -- %inc = add nsw i32 %i.0, 1 -- br label %for.cond -+ %inc = add nsw i32 %i.02, 1 -+ %cmp = icmp slt i32 %inc, %n -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body, %entry - ret void - } - -@@ -180,25 +170,23 @@ define void @arst1(i32* %b, i32 %n) #0 { - ; CHECK: ret - - entry: -- br label %for.cond -+ %cmp1 = icmp slt i32 0, %n -+ br i1 %cmp1, label %for.body, label %for.end - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp slt i32 %i.0, %n -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %idxprom = sext i32 %i.0 to i64 -+for.body: ; preds = %entry, %for.body -+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -+ %idxprom = sext i32 %i.02 to i64 - %arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom - %0 = load i32 addrspace(1)* %arrayidx, align 4 - %mul = mul nsw i32 %0, 3 -- %idxprom1 = sext i32 %i.0 to i64 -+ %idxprom1 = sext i32 %i.02 to i64 - %arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1 - store i32 %mul, i32* %arrayidx2, align 4 -- %inc = add nsw i32 %i.0, 1 -- br label %for.cond -+ %inc = add nsw i32 %i.02, 1 -+ %cmp = icmp slt i32 %inc, %n -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body, %entry - ret void - } - -@@ -210,25 +198,23 @@ define void @aoeu(i32 %n) #0 { - ; CHECK: ret - - entry: -- br label %for.cond -+ %cmp1 = icmp slt i32 0, %n -+ br i1 %cmp1, label %for.body, label %for.end - --for.cond: ; preds = %for.body, %entry -- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] -- %cmp = icmp slt i32 %i.0, %n -- br i1 %cmp, label %for.body, label %for.end -- --for.body: ; preds = %for.cond -- %idxprom = sext i32 %i.0 to i64 -+for.body: ; preds = %entry, %for.body -+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ] -+ %idxprom = sext i32 %i.02 to i64 - %arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom - %0 = load i32 addrspace(2)* %arrayidx, align 4 - %mul = mul nsw i32 %0, 3 -- %idxprom1 = sext i32 %i.0 to i64 -+ %idxprom1 = sext i32 %i.02 to i64 - %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1 - store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 -- %inc = add nsw i32 %i.0, 1 -- br label %for.cond -+ %inc = add nsw i32 %i.02, 1 -+ %cmp = icmp slt i32 %inc, %n -+ br i1 %cmp, label %for.body, label %for.end - --for.end: ; preds = %for.cond -+for.end: ; preds = %for.body, %entry - ret void - } - diff --git a/contrib/llvm/patches/patch-24-llvm-r221791-ppc-small-pic.diff b/contrib/llvm/patches/patch-22-llvm-r221791-ppc-small-pic.diff index e69954af..b066b13 100644 --- a/contrib/llvm/patches/patch-24-llvm-r221791-ppc-small-pic.diff +++ b/contrib/llvm/patches/patch-22-llvm-r221791-ppc-small-pic.diff @@ -318,9 +318,9 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp // Change the opcode to LWZ, and the global address operand to be a // reference to the GOT entry we will synthesize later. -@@ -382,16 +409,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - else if (MO.isJTI()) - MOSymbol = GetJTISymbol(MO.getIndex()); +@@ -384,16 +411,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineI + else if (MO.isBlockAddress()) + MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress()); - MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol); + if (PL == PICLevel::Small) { @@ -351,8 +351,8 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp EmitToStreamer(OutStreamer, TmpInst); return; } -@@ -399,7 +433,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - case PPC::LDtocCPT: +@@ -402,7 +436,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI + case PPC::LDtocBA: case PPC::LDtoc: { // Transform %X3 = LDtoc <ga:@min1>, %X2 - LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); @@ -360,7 +360,7 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp // Change the opcode to LD, and the global address operand to be a // reference to the TOC entry we will synthesize later. -@@ -428,7 +462,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI +@@ -433,7 +467,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI case PPC::ADDIStocHA: { // Transform %Xd = ADDIStocHA %X2, <ga:@sym> @@ -369,7 +369,7 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp // Change the opcode to ADDIS8. If the global address is external, has // common linkage, is a non-local function address, or is a jump table -@@ -470,7 +504,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI +@@ -479,7 +513,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI } case PPC::LDtocL: { // Transform %Xd = LDtocL <ga:@sym>, %Xs @@ -378,7 +378,7 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp // Change the opcode to LD. If the global address is external, has // common linkage, or is a jump table address, then reference the -@@ -507,7 +541,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI +@@ -521,7 +555,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI } case PPC::ADDItocL: { // Transform %Xd = ADDItocL %Xs, <ga:@sym> @@ -387,7 +387,7 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp // Change the opcode to ADDI8. If the global address is external, then // generate a TOC entry and reference that. Otherwise reference the -@@ -558,7 +592,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI +@@ -572,7 +606,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI case PPC::LDgotTprelL: case PPC::LDgotTprelL32: { // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs @@ -396,7 +396,7 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp // Change the opcode to LD. TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ); -@@ -841,7 +875,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI +@@ -796,7 +830,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineI } } @@ -405,7 +405,7 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp EmitToStreamer(OutStreamer, TmpInst); } -@@ -857,16 +891,14 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module +@@ -812,16 +846,14 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_) return AsmPrinter::EmitStartOfAsmFile(M); @@ -426,7 +426,7 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp MCSymbol *CurrentPos = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(CurrentPos); -@@ -885,7 +917,9 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module +@@ -840,7 +872,9 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { // linux/ppc32 - Normal entry label. @@ -437,7 +437,7 @@ Index: lib/Target/PowerPC/PPCAsmPrinter.cpp return AsmPrinter::EmitFunctionEntryLabel(); if (!Subtarget.isPPC64()) { -@@ -897,7 +931,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() +@@ -852,7 +886,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() const MCExpr *OffsExpr = MCBinaryExpr::CreateSub( @@ -458,7 +458,7 @@ Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -@@ -273,23 +274,29 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { +@@ -283,23 +284,29 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { // Insert the set of GlobalBaseReg into the first MBB of the function MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); @@ -499,7 +499,7 @@ Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp } } else { GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass); -@@ -1429,13 +1436,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { +@@ -1439,13 +1446,13 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } case PPCISD::TOC_ENTRY: { @@ -519,7 +519,7 @@ Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp -@@ -1653,6 +1653,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S +@@ -1682,6 +1682,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(); bool is64bit = Subtarget.isPPC64(); @@ -528,7 +528,7 @@ Index: lib/Target/PowerPC/PPCISelLowering.cpp TLSModel::Model Model = getTargetMachine().getTLSModel(GV); -@@ -1691,7 +1693,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S +@@ -1721,7 +1723,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, GOTReg, TGA); } else { @@ -540,7 +540,7 @@ Index: lib/Target/PowerPC/PPCISelLowering.cpp } SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, GOTPtr, TGA); -@@ -1721,7 +1726,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S +@@ -1738,7 +1743,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, GOTReg, TGA); } else { @@ -556,7 +556,7 @@ Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td -@@ -976,6 +976,9 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = +@@ -980,6 +980,9 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = let Defs = [LR] in def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; @@ -566,7 +566,7 @@ Index: lib/Target/PowerPC/PPCInstrInfo.td let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { let isBarrier = 1 in { -@@ -2444,15 +2447,13 @@ def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins +@@ -2442,15 +2445,13 @@ def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins tglobaltlsaddr:$disp))]>; // Support for Position-independent code diff --git a/contrib/llvm/patches/patch-25-llvm-r224415-ppc-local.diff b/contrib/llvm/patches/patch-23-llvm-r224415-ppc-local.diff index 9099743..9099743 100644 --- a/contrib/llvm/patches/patch-25-llvm-r224415-ppc-local.diff +++ b/contrib/llvm/patches/patch-23-llvm-r224415-ppc-local.diff diff --git a/contrib/llvm/patches/patch-26-llvm-r213890-ppc-eh_frame.diff b/contrib/llvm/patches/patch-24-llvm-r213890-ppc-eh_frame.diff index 9a4881f..9a4881f 100644 --- a/contrib/llvm/patches/patch-26-llvm-r213890-ppc-eh_frame.diff +++ b/contrib/llvm/patches/patch-24-llvm-r213890-ppc-eh_frame.diff diff --git a/contrib/llvm/patches/patch-28-llvm-r224890-ppc-ctr-tls-loop.diff b/contrib/llvm/patches/patch-25-llvm-r224890-ppc-ctr-tls-loop.diff index aeb81f2..aeb81f2 100644 --- a/contrib/llvm/patches/patch-28-llvm-r224890-ppc-ctr-tls-loop.diff +++ b/contrib/llvm/patches/patch-25-llvm-r224890-ppc-ctr-tls-loop.diff diff --git a/contrib/llvm/patches/patch-29-clang-r213790-type_traits-crash.diff b/contrib/llvm/patches/patch-26-clang-r213790-type_traits-crash.diff index e721d24..e721d24 100644 --- a/contrib/llvm/patches/patch-29-clang-r213790-type_traits-crash.diff +++ b/contrib/llvm/patches/patch-26-clang-r213790-type_traits-crash.diff diff --git a/contrib/llvm/patches/patch-27-llvm-r221703-ppc-tls_get_addr.diff b/contrib/llvm/patches/patch-27-llvm-r221703-ppc-tls_get_addr.diff deleted file mode 100644 index 6cf2a6a..0000000 --- a/contrib/llvm/patches/patch-27-llvm-r221703-ppc-tls_get_addr.diff +++ /dev/null @@ -1,504 +0,0 @@ -Pull in r221703 from upstream llvm trunk (by Bill Schmidt): - - [PowerPC] Replace foul hackery with real calls to __tls_get_addr - - My original support for the general dynamic and local dynamic TLS - models contained some fairly obtuse hacks to generate calls to - __tls_get_addr when lowering a TargetGlobalAddress. Rather than - generating real calls, special GET_TLS_ADDR nodes were used to wrap - the calls and only reveal them at assembly time. I attempted to - provide correct parameter and return values by chaining CopyToReg and - CopyFromReg nodes onto the GET_TLS_ADDR nodes, but this was also not - fully correct. Problems were seen with two back-to-back stores to TLS - variables, where the call sequences ended up overlapping with unhappy - results. Additionally, since these weren't real calls, the proper - register side effects of a call were not recorded, so clobbered values - were kept live across the calls. - - The proper thing to do is to lower these into calls in the first - place. This is relatively straightforward; see the changes to - PPCTargetLowering::LowerGlobalTLSAddress() in PPCISelLowering.cpp. - The changes here are standard call lowering, except that we need to - track the fact that these calls will require a relocation. This is - done by adding a machine operand flag of MO_TLSLD or MO_TLSGD to the - TargetGlobalAddress operand that appears earlier in the sequence. - - The calls to LowerCallTo() eventually find their way to - LowerCall_64SVR4() or LowerCall_32SVR4(), which call FinishCall(), - which calls PrepareCall(). In PrepareCall(), we detect the calls to - __tls_get_addr and immediately snag the TargetGlobalTLSAddress with - the annotated relocation information. This becomes an extra operand - on the call following the callee, which is expected for nodes of type - tlscall. We change the call opcode to CALL_TLS for this case. Back - in FinishCall(), we change it again to CALL_NOP_TLS for 64-bit only, - since we require a TOC-restore nop following the call for the 64-bit - ABIs. - - During selection, patterns in PPCInstrInfo.td and PPCInstr64Bit.td - convert the CALL_TLS nodes into BL_TLS nodes, and convert the - CALL_NOP_TLS nodes into BL8_NOP_TLS nodes. This replaces the code - removed from PPCAsmPrinter.cpp, as the BL_TLS or BL8_NOP_TLS - nodes can now be emitted normally using their patterns and the - associated printTLSCall print method. - - Finally, as a result of these changes, all references to get-tls-addr - in its various guises are no longer used, so they have been removed. - - There are existing TLS tests to verify the changes haven't messed - anything up). I've added one new test that verifies that the problem - with the original code has been fixed. - -This fixes a fatal "Bad machine code" error when compiling parts of -libgomp for 32-bit PowerPC. - -Introduced here: http://svnweb.freebsd.org/changeset/base/276301 - -Index: lib/Target/PowerPC/PPC.h -=================================================================== ---- lib/Target/PowerPC/PPC.h -+++ lib/Target/PowerPC/PPC.h -@@ -96,7 +96,12 @@ namespace llvm { - MO_TOC_LO = 7 << 4, - - // Symbol for VK_PPC_TLS fixup attached to an ADD instruction -- MO_TLS = 8 << 4 -+ MO_TLS = 8 << 4, -+ -+ // Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr -+ // call sequences. -+ MO_TLSLD = 9 << 4, -+ MO_TLSGD = 10 << 4 - }; - } // end namespace PPCII - -Index: lib/Target/PowerPC/PPCAsmPrinter.cpp -=================================================================== ---- lib/Target/PowerPC/PPCAsmPrinter.cpp -+++ lib/Target/PowerPC/PPCAsmPrinter.cpp -@@ -689,35 +689,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - .addExpr(SymGotTlsGD)); - return; - } -- case PPC::GETtlsADDR: -- // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> -- // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd) -- case PPC::GETtlsADDR32: { -- // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym> -- // Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT -- -- StringRef Name = "__tls_get_addr"; -- MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); -- MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; -- -- if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && -- TM.getRelocationModel() == Reloc::PIC_) -- Kind = MCSymbolRefExpr::VK_PLT; -- const MCSymbolRefExpr *TlsRef = -- MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); -- const MachineOperand &MO = MI->getOperand(2); -- const GlobalValue *GValue = MO.getGlobal(); -- MCSymbol *MOSymbol = getSymbol(GValue); -- const MCExpr *SymVar = -- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, -- OutContext); -- EmitToStreamer(OutStreamer, -- MCInstBuilder(Subtarget.isPPC64() ? -- PPC::BL8_NOP_TLS : PPC::BL_TLS) -- .addExpr(TlsRef) -- .addExpr(SymVar)); -- return; -- } - case PPC::ADDIStlsldHA: { - // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym> - // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha -@@ -755,36 +726,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineI - .addExpr(SymGotTlsLD)); - return; - } -- case PPC::GETtlsldADDR: -- // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> -- // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld) -- case PPC::GETtlsldADDR32: { -- // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym> -- // Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT -- -- StringRef Name = "__tls_get_addr"; -- MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name); -- MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; -- -- if (!Subtarget.isPPC64() && !Subtarget.isDarwin() && -- TM.getRelocationModel() == Reloc::PIC_) -- Kind = MCSymbolRefExpr::VK_PLT; -- -- const MCSymbolRefExpr *TlsRef = -- MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext); -- const MachineOperand &MO = MI->getOperand(2); -- const GlobalValue *GValue = MO.getGlobal(); -- MCSymbol *MOSymbol = getSymbol(GValue); -- const MCExpr *SymVar = -- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, -- OutContext); -- EmitToStreamer(OutStreamer, -- MCInstBuilder(Subtarget.isPPC64() ? -- PPC::BL8_NOP_TLS : PPC::BL_TLS) -- .addExpr(TlsRef) -- .addExpr(SymVar)); -- return; -- } - case PPC::ADDISdtprelHA: - // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym> - // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha -Index: lib/Target/PowerPC/PPCISelLowering.cpp -=================================================================== ---- lib/Target/PowerPC/PPCISelLowering.cpp -+++ lib/Target/PowerPC/PPCISelLowering.cpp -@@ -781,6 +781,8 @@ const char *PPCTargetLowering::getTargetNodeName(u - case PPCISD::SHL: return "PPCISD::SHL"; - case PPCISD::CALL: return "PPCISD::CALL"; - case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; -+ case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; -+ case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; - case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL: return "PPCISD::BCTRL"; - case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; -@@ -810,10 +812,8 @@ const char *PPCTargetLowering::getTargetNodeName(u - case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; - case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; - case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; -- case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; - case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; - case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; -- case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; - case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; - case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; - case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; -@@ -1641,6 +1641,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDVal - return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); - } - -+// Generate a call to __tls_get_addr for the given GOT entry Op. -+std::pair<SDValue,SDValue> -+PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, -+ SelectionDAG &DAG) const { -+ -+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); -+ TargetLowering::ArgListTy Args; -+ TargetLowering::ArgListEntry Entry; -+ Entry.Node = Op; -+ Entry.Ty = IntPtrTy; -+ Args.push_back(Entry); -+ -+ TargetLowering::CallLoweringInfo CLI(DAG); -+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) -+ .setCallee(CallingConv::C, IntPtrTy, -+ DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), -+ std::move(Args), 0); -+ -+ return LowerCallTo(CLI); -+} -+ - SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - -@@ -1686,7 +1707,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S - } - - if (Model == TLSModel::GeneralDynamic) { -- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); -+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, -+ PPCII::MO_TLSGD); - SDValue GOTPtr; - if (is64bit) { - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); -@@ -1700,26 +1722,13 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S - } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, - GOTPtr, TGA); -- -- // We need a chain node, and don't have one handy. The underlying -- // call has no side effects, so using the function entry node -- // suffices. -- SDValue Chain = DAG.getEntryNode(); -- Chain = DAG.getCopyToReg(Chain, dl, -- is64bit ? PPC::X3 : PPC::R3, GOTEntry); -- SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3, -- is64bit ? MVT::i64 : MVT::i32); -- SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, -- PtrVT, ParmReg, TGA); -- // The return value from GET_TLS_ADDR really is in X3 already, but -- // some hacks are needed here to tie everything together. The extra -- // copies dissolve during subsequent transforms. -- Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr); -- return DAG.getCopyFromReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, PtrVT); -+ std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); -+ return CallResult.first; - } - - if (Model == TLSModel::LocalDynamic) { -- SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); -+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, -+ PPCII::MO_TLSLD); - SDValue GOTPtr; - if (is64bit) { - SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); -@@ -1733,23 +1742,11 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(S - } - SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, - GOTPtr, TGA); -- -- // We need a chain node, and don't have one handy. The underlying -- // call has no side effects, so using the function entry node -- // suffices. -- SDValue Chain = DAG.getEntryNode(); -- Chain = DAG.getCopyToReg(Chain, dl, -- is64bit ? PPC::X3 : PPC::R3, GOTEntry); -- SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3, -- is64bit ? MVT::i64 : MVT::i32); -- SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, -- PtrVT, ParmReg, TGA); -- // The return value from GET_TLSLD_ADDR really is in X3 already, but -- // some hacks are needed here to tie everything together. The extra -- // copies dissolve during subsequent transforms. -- Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr); -+ std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); -+ SDValue TLSAddr = CallResult.first; -+ SDValue Chain = CallResult.second; - SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, -- Chain, ParmReg, TGA); -+ Chain, TLSAddr, TGA); - return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); - } - -@@ -3712,6 +3709,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &C - if (Callee.getNode()) { - Ops.push_back(Chain); - Ops.push_back(Callee); -+ -+ // If this is a call to __tls_get_addr, find the symbol whose address -+ // is to be taken and add it to the list. This will be used to -+ // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld). -+ // We find the symbol by walking the chain to the CopyFromReg, walking -+ // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and -+ // pulling the symbol from that node. -+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) -+ if (!strcmp(S->getSymbol(), "__tls_get_addr")) { -+ assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); -+ SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); -+ SDValue TGTAddr = AddI->getOperand(1); -+ assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && -+ "Didn't find target global TLS address where we expected one"); -+ Ops.push_back(TGTAddr); -+ CallOpc = PPCISD::CALL_TLS; -+ } - } - // If this is a tail call add stack pointer delta. - if (isTailCall) -@@ -3863,7 +3877,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID Call - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { - // Otherwise insert NOP for non-local calls. - CallOpc = PPCISD::CALL_NOP; -- } -+ } else if (CallOpc == PPCISD::CALL_TLS) -+ // For 64-bit SVR4, TLS calls are always non-local. -+ CallOpc = PPCISD::CALL_NOP_TLS; - } - - Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); -Index: lib/Target/PowerPC/PPCISelLowering.h -=================================================================== ---- lib/Target/PowerPC/PPCISelLowering.h -+++ lib/Target/PowerPC/PPCISelLowering.h -@@ -99,6 +99,10 @@ namespace llvm { - /// SVR4 calls. - CALL, CALL_NOP, - -+ /// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used -+ /// to access TLS variables. -+ CALL_TLS, CALL_NOP_TLS, -+ - /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a - /// MTCTR instruction. - MTCTR, -@@ -214,10 +218,6 @@ namespace llvm { - /// sym\@got\@tlsgd\@l. - ADDI_TLSGD_L, - -- /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS -- /// model, produces a call to __tls_get_addr(sym\@tlsgd). -- GET_TLS_ADDR, -- - /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS - /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym\@got\@tlsld\@ha. -@@ -228,10 +228,6 @@ namespace llvm { - /// sym\@got\@tlsld\@l. - ADDI_TLSLD_L, - -- /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS -- /// model, produces a call to __tls_get_addr(sym\@tlsld). -- GET_TLSLD_ADDR, -- - /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the - /// local-dynamic TLS model, produces an ADDIS8 instruction - /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed -@@ -552,6 +548,8 @@ namespace llvm { - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; -+ std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl, -+ SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; -Index: lib/Target/PowerPC/PPCInstr64Bit.td -=================================================================== ---- lib/Target/PowerPC/PPCInstr64Bit.td -+++ lib/Target/PowerPC/PPCInstr64Bit.td -@@ -188,6 +188,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)), - def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), - (BL8_NOP texternalsym:$dst)>; - -+def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym), -+ (BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; -+ - // Atomic operations - let usesCustomInserter = 1 in { - let Defs = [CR0] in { -@@ -872,11 +875,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc - [(set i64:$rD, - (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, - isPPC64; --def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), -- "#GETtlsADDR", -- [(set i64:$rD, -- (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, -- isPPC64; - def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), - "#ADDIStlsldHA", - [(set i64:$rD, -@@ -887,11 +885,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc - [(set i64:$rD, - (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, - isPPC64; --def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), -- "#GETtlsldADDR", -- [(set i64:$rD, -- (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, -- isPPC64; - def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), - "#ADDISdtprelHA", - [(set i64:$rD, -Index: lib/Target/PowerPC/PPCInstrInfo.td -=================================================================== ---- lib/Target/PowerPC/PPCInstrInfo.td -+++ lib/Target/PowerPC/PPCInstrInfo.td -@@ -110,10 +110,8 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_ - def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; - def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; - def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; --def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; - def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; - def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; --def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; - def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp, - [SDNPHasChain]>; - def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; -@@ -136,9 +134,15 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisIn - def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -+def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall, -+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, -+ SDNPVariadic]>; - def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -+def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall, -+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, -+ SDNPVariadic]>; - def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, -@@ -2369,6 +2373,8 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)), - def : Pat<(PPCcall (i32 texternalsym:$dst)), - (BL texternalsym:$dst)>; - -+def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym), -+ (BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>; - - def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), - (TCRETURNdi tglobaladdr:$dst, imm:$imm)>; -@@ -2424,18 +2430,10 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gp - "#ADDItlsgdL32", - [(set i32:$rD, - (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>; --def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), -- "#GETtlsADDR32", -- [(set i32:$rD, -- (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>; - def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), - "#ADDItlsldL32", - [(set i32:$rD, - (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>; --def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym), -- "#GETtlsldADDR32", -- [(set i32:$rD, -- (PPCgetTlsldAddr i32:$reg, tglobaltlsaddr:$sym))]>; - def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp), - "#ADDIdtprelL32", - [(set i32:$rD, -Index: lib/Target/PowerPC/PPCMCInstLower.cpp -=================================================================== ---- lib/Target/PowerPC/PPCMCInstLower.cpp -+++ lib/Target/PowerPC/PPCMCInstLower.cpp -@@ -137,6 +137,12 @@ static MCOperand GetSymbolRef(const MachineOperand - case PPCII::MO_TLS: - RefKind = MCSymbolRefExpr::VK_PPC_TLS; - break; -+ case PPCII::MO_TLSGD: -+ RefKind = MCSymbolRefExpr::VK_PPC_TLSGD; -+ break; -+ case PPCII::MO_TLSLD: -+ RefKind = MCSymbolRefExpr::VK_PPC_TLSLD; -+ break; - } - - if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin) -Index: test/CodeGen/PowerPC/tls-store2.ll -=================================================================== ---- test/CodeGen/PowerPC/tls-store2.ll -+++ test/CodeGen/PowerPC/tls-store2.ll -@@ -0,0 +1,33 @@ -+; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s -+ -+target datalayout = "e-m:e-i64:64-n32:64" -+target triple = "powerpc64le-unknown-linux-gnu" -+ -+; Test back-to-back stores of TLS variables to ensure call sequences no -+; longer overlap. -+ -+@__once_callable = external thread_local global i8** -+@__once_call = external thread_local global void ()* -+ -+define i64 @call_once(i64 %flag, i8* %ptr) { -+entry: -+ %var = alloca i8*, align 8 -+ store i8* %ptr, i8** %var, align 8 -+ store i8** %var, i8*** @__once_callable, align 8 -+ store void ()* @__once_call_impl, void ()** @__once_call, align 8 -+ ret i64 %flag -+} -+ -+; CHECK-LABEL: call_once: -+; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha -+; CHECK: addi 3, 3, __once_callable@got@tlsgd@l -+; CHECK: bl __tls_get_addr(__once_callable@tlsgd) -+; CHECK-NEXT: nop -+; CHECK: std {{[0-9]+}}, 0(3) -+; CHECK: addis 3, 2, __once_call@got@tlsgd@ha -+; CHECK: addi 3, 3, __once_call@got@tlsgd@l -+; CHECK: bl __tls_get_addr(__once_call@tlsgd) -+; CHECK-NEXT: nop -+; CHECK: std {{[0-9]+}}, 0(3) -+ -+declare void @__once_call_impl() diff --git a/contrib/llvm/patches/patch-30-llvm-r222587-arm-add-pc.diff b/contrib/llvm/patches/patch-27-llvm-r222587-arm-add-pc.diff index 46b2a04..46b2a04 100644 --- a/contrib/llvm/patches/patch-30-llvm-r222587-arm-add-pc.diff +++ b/contrib/llvm/patches/patch-27-llvm-r222587-arm-add-pc.diff diff --git a/contrib/llvm/patches/patch-31-llvm-r222292-aarch64-no-neon.diff b/contrib/llvm/patches/patch-28-llvm-r222292-aarch64-no-neon.diff index c1120bb..c1120bb 100644 --- a/contrib/llvm/patches/patch-31-llvm-r222292-aarch64-no-neon.diff +++ b/contrib/llvm/patches/patch-28-llvm-r222292-aarch64-no-neon.diff |