diff options
author | ed <ed@FreeBSD.org> | 2009-06-14 09:23:33 +0000 |
---|---|---|
committer | ed <ed@FreeBSD.org> | 2009-06-14 09:23:33 +0000 |
commit | db89e312d968c258aba3c79c1c398f5fb19267a3 (patch) | |
tree | 49817b316c4fdaa56d9d16ebf2555303d1a990e0 /lib/Target/X86 | |
parent | de000e339094f8c6e06a635dac9a803861416ec6 (diff) | |
download | FreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.zip FreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.tar.gz |
Import LLVM r73340.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/README.txt | 114 | ||||
-rw-r--r-- | lib/Target/X86/X86CallingConv.td | 44 | ||||
-rw-r--r-- | lib/Target/X86/X86ELFWriterInfo.cpp | 23 | ||||
-rw-r--r-- | lib/Target/X86/X86ELFWriterInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.cpp | 105 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 3 |
9 files changed, 142 insertions, 165 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 710bd03..3796aac 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -482,35 +482,6 @@ _usesbb: //===---------------------------------------------------------------------===// -Currently we don't have elimination of redundant stack manipulations. Consider -the code: - -int %main() { -entry: - call fastcc void %test1( ) - call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) ) - ret int 0 -} - -declare fastcc void %test1() - -declare fastcc void %test2(sbyte*) - - -This currently compiles to: - - subl $16, %esp - call _test5 - addl $12, %esp - subl $16, %esp - movl $_test5, (%esp) - call _test6 - addl $12, %esp - -The add\sub pair is really unneeded here. - -//===---------------------------------------------------------------------===// - Consider the expansion of: define i32 @test3(i32 %X) { @@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0. //===---------------------------------------------------------------------===// -We aren't matching RMW instructions aggressively -enough. Here's a reduced testcase (more in PR1160): - -define void @test(i32* %huge_ptr, i32* %target_ptr) { - %A = load i32* %huge_ptr ; <i32> [#uses=1] - %B = load i32* %target_ptr ; <i32> [#uses=1] - %C = or i32 %A, %B ; <i32> [#uses=1] - store i32 %C, i32* %target_ptr - ret void -} - -$ llvm-as < t.ll | llc -march=x86-64 - -_test: - movl (%rdi), %eax - orl (%rsi), %eax - movl %eax, (%rsi) - ret - -That should be something like: - -_test: - movl (%rdi), %eax - orl %eax, (%rsi) - ret - -//===---------------------------------------------------------------------===// - The following code: bb114.preheader: ; preds = %cond_next94 @@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" //===---------------------------------------------------------------------===// + +Testcase: +int a(int x) { return (x & 127) > 31; } + +Current output: + movl 4(%esp), %eax + andl $127, %eax + cmpl $31, %eax + seta %al + movzbl %al, %eax + ret + +Ideal output: + xorl %eax, %eax + testl $96, 4(%esp) + setne %al + ret + +We could do this transformation in instcombine, but it's only clearly +beneficial on platforms with a test instruction. + +//===---------------------------------------------------------------------===// +Testcase: +int x(int a) { return (a&0xf0)>>4; } + +Current output: + movl 4(%esp), %eax + shrl $4, %eax + andl $15, %eax + ret + +Ideal output: + movzbl 4(%esp), %eax + shrl $4, %eax + ret + +//===---------------------------------------------------------------------===// + +Testcase: +int x(int a) { return (a & 0x80) ? 0x100 : 0; } + +Current output: + testl $128, 4(%esp) + setne %al + movzbl %al, %eax + shll $8, %eax + ret + +Ideal output: + movl 4(%esp), %eax + addl %eax, %eax + andl $256, %eax + ret + +We generally want to fold shifted tests of a single bit into a shift+and on x86. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7f99203..e9fcbd5 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>> ]>; -// Tail call convention (fast): One register is reserved for target address, -// namely R9 -def CC_X86_64_TailCall : CallingConv<[ - // Handles byval parameters. - CCIfByVal<CCPassByVal<8, 8>>, - - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType<i32>>, - - // The 'nest' parameter, if any, is passed in R10. - CCIfNest<CCAssignToReg<[R10]>>, - - // The first 6 integer arguments are passed in integer registers. - CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>, - CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>, - - // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - - // The first 8 MMX (except for v1i64) vector arguments are passed in XMM - // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32, v2f32], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - - // The first 8 v1i64 vector arguments are passed in GPRs on Darwin. - CCIfType<[v1i64], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>, - - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, - - // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> -]>; - - //===----------------------------------------------------------------------===// // X86 C Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 2604741..d84034b 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -12,8 +12,27 @@ //===----------------------------------------------------------------------===// #include "X86ELFWriterInfo.h" +#include "llvm/Function.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) : - TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {} +X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) + : TargetELFWriterInfo(TM) { + bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + EMachine = is64Bit ? EM_X86_64 : EM_386; + } + X86ELFWriterInfo::~X86ELFWriterInfo() {} + +unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const { + unsigned FnAlign = 4; + + if (F->hasFnAttr(Attribute::OptimizeForSize)) + FnAlign = 1; + + if (F->getAlignment()) + FnAlign = Log2_32(F->getAlignment()); + + return (1 << FnAlign); +} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index acfa501..e9c5bc4 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -20,8 +20,10 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { public: - X86ELFWriterInfo(bool is64Bit); + X86ELFWriterInfo(TargetMachine &TM); virtual ~X86ELFWriterInfo(); + + virtual unsigned getFunctionAlignment(const Function *F) const; }; } // end llvm namespace diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 33332e4..2bcfd76 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && isTaillCall) - return CC_X86_64_TailCall; else return CC_X86_64_C; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9e15a54..36e3ab2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { SDValue StackAdjustment = TailCall.getOperand(2); assert(((TargetAddress.getOpcode() == ISD::Register && (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX || - cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || + cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) || TargetAddress.getOpcode() == ISD::TargetExternalSymbol || TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && "Expecting an global address, external symbol, or register"); @@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && PerformTailCallOpt) - return CC_X86_64_TailCall; else return CC_X86_64_C; } @@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); } else if (IsTailCall) { - unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; + unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Opc, getPointerTy()), @@ -7696,7 +7694,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { LDBase = NULL; - LastLoadedElt = -1; + LastLoadedElt = -1U; for (unsigned i = 0; i < NumElems; ++i) { if (N->getMaskElt(i) < 0) { if (!LDBase) diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c733f26..6c0074e 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -822,6 +822,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } + unsigned ReadyLabelId = 0; + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer is ready. + ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); + } + // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || @@ -831,67 +838,61 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); - if (NumBytes) { // Adjust stack pointer: ESP -= numbytes. - if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { - // Check, whether EAX is livein for this function. - bool isEAXAlive = false; - for (MachineRegisterInfo::livein_iterator + // Adjust stack pointer: ESP -= numbytes. + if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { + // Check, whether EAX is livein for this function. + bool isEAXAlive = false; + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { - unsigned Reg = II->first; - isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL); - } + unsigned Reg = II->first; + isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL); + } - // Function prologue calls _alloca to probe the stack when allocating more - // than 4k bytes in one go. Touching the stack at 4K increments is - // necessary to ensure that the guard pages used by the OS virtual memory - // manager are allocated in correct sequence. - if (!isEAXAlive) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); - } else { - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill); - - // Allocate NumBytes-4 bytes on stack. We'll also use 4 already - // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes-4); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); - - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes-4); - MBB.insert(MBBI, MI); - } + // Function prologue calls _alloca to probe the stack when allocating more + // than 4k bytes in one go. Touching the stack at 4K increments is necessary + // to ensure that the guard pages used by the OS virtual memory manager are + // allocated in correct sequence. + if (!isEAXAlive) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes); + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca"); } else { - // If there is an SUB32ri of ESP immediately before this instruction, - // merge the two. This can be the case when tail call elimination is - // enabled and the callee has more arguments then the caller. - NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + // Save EAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) + .addReg(X86::EAX, RegState::Kill); + + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already + // allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes - 4); + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca"); + + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MBB.insert(MBBI, MI); + } + } else if (NumBytes) { + // If there is an SUB32ri of ESP immediately before this instruction, merge + // the two. This can be the case when tail call elimination is enabled and + // the callee has more arguments then the caller. + NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); - // If there is an ADD32ri or SUB32ri of ESP immediately after this - // instruction, merge the two instructions. - mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); + // If there is an ADD32ri or SUB32ri of ESP immediately after this + // instruction, merge the two instructions. + mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); - if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); - } + if (NumBytes) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); } - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer is ready. - unsigned ReadyLabelId = 0; - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); + if (needsFrameMoves) emitFrameMoves(MF, FrameLabelId, ReadyLabelId); - } } void X86RegisterInfo::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 03ce1ae..56983ce 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -350,6 +350,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , MaxInlineSizeThreshold(128) , Is64Bit(is64Bit) , TargetType(isELF) { // Default to ELF unless otherwise specified. + + // default to hard float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Hard; // Determine default and user specified characteristics if (!FS.empty()) { diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 88ab247..dfb055f 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -133,8 +133,7 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), - ELFWriterInfo(Subtarget.is64Bit()) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); // FIXME: Correctly select PIC model for Win64 stuff if (getRelocationModel() == Reloc::Default) { |