summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
authored <ed@FreeBSD.org>2009-06-14 09:23:33 +0000
committered <ed@FreeBSD.org>2009-06-14 09:23:33 +0000
commitdb89e312d968c258aba3c79c1c398f5fb19267a3 (patch)
tree49817b316c4fdaa56d9d16ebf2555303d1a990e0 /lib/Target/X86
parentde000e339094f8c6e06a635dac9a803861416ec6 (diff)
downloadFreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.zip
FreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.tar.gz
Import LLVM r73340.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/README.txt114
-rw-r--r--lib/Target/X86/X86CallingConv.td44
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp23
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h4
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp105
-rw-r--r--lib/Target/X86/X86Subtarget.cpp4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp3
9 files changed, 142 insertions, 165 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 710bd03..3796aac 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -482,35 +482,6 @@ _usesbb:
//===---------------------------------------------------------------------===//
-Currently we don't have elimination of redundant stack manipulations. Consider
-the code:
-
-int %main() {
-entry:
- call fastcc void %test1( )
- call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) )
- ret int 0
-}
-
-declare fastcc void %test1()
-
-declare fastcc void %test2(sbyte*)
-
-
-This currently compiles to:
-
- subl $16, %esp
- call _test5
- addl $12, %esp
- subl $16, %esp
- movl $_test5, (%esp)
- call _test6
- addl $12, %esp
-
-The add\sub pair is really unneeded here.
-
-//===---------------------------------------------------------------------===//
-
Consider the expansion of:
define i32 @test3(i32 %X) {
@@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0.
//===---------------------------------------------------------------------===//
-We aren't matching RMW instructions aggressively
-enough. Here's a reduced testcase (more in PR1160):
-
-define void @test(i32* %huge_ptr, i32* %target_ptr) {
- %A = load i32* %huge_ptr ; <i32> [#uses=1]
- %B = load i32* %target_ptr ; <i32> [#uses=1]
- %C = or i32 %A, %B ; <i32> [#uses=1]
- store i32 %C, i32* %target_ptr
- ret void
-}
-
-$ llvm-as < t.ll | llc -march=x86-64
-
-_test:
- movl (%rdi), %eax
- orl (%rsi), %eax
- movl %eax, (%rsi)
- ret
-
-That should be something like:
-
-_test:
- movl (%rdi), %eax
- orl %eax, (%rsi)
- ret
-
-//===---------------------------------------------------------------------===//
-
The following code:
bb114.preheader: ; preds = %cond_next94
@@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
Core 2, and "Generic"
//===---------------------------------------------------------------------===//
+
+Testcase:
+int a(int x) { return (x & 127) > 31; }
+
+Current output:
+ movl 4(%esp), %eax
+ andl $127, %eax
+ cmpl $31, %eax
+ seta %al
+ movzbl %al, %eax
+ ret
+
+Ideal output:
+ xorl %eax, %eax
+ testl $96, 4(%esp)
+ setne %al
+ ret
+
+We could do this transformation in instcombine, but it's only clearly
+beneficial on platforms with a test instruction.
+
+//===---------------------------------------------------------------------===//
+Testcase:
+int x(int a) { return (a&0xf0)>>4; }
+
+Current output:
+ movl 4(%esp), %eax
+ shrl $4, %eax
+ andl $15, %eax
+ ret
+
+Ideal output:
+ movzbl 4(%esp), %eax
+ shrl $4, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Testcase:
+int x(int a) { return (a & 0x80) ? 0x100 : 0; }
+
+Current output:
+ testl $128, 4(%esp)
+ setne %al
+ movzbl %al, %eax
+ shll $8, %eax
+ ret
+
+Ideal output:
+ movl 4(%esp), %eax
+ addl %eax, %eax
+ andl $256, %eax
+ ret
+
+We generally want to fold shifted tests of a single bit into a shift+and on x86.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 7f99203..e9fcbd5 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
]>;
-// Tail call convention (fast): One register is reserved for target address,
-// namely R9
-def CC_X86_64_TailCall : CallingConv<[
- // Handles byval parameters.
- CCIfByVal<CCPassByVal<8, 8>>,
-
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
- // The 'nest' parameter, if any, is passed in R10.
- CCIfNest<CCAssignToReg<[R10]>>,
-
- // The first 6 integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
- CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
-
- // The first 8 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
- // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
- // registers on Darwin.
- CCIfType<[v8i8, v4i16, v2i32, v2f32],
- CCIfSubtarget<"isTargetDarwin()",
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
- // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
- CCIfType<[v1i64],
- CCIfSubtarget<"isTargetDarwin()",
- CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
-
- // Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
- // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
-]>;
-
-
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 2604741..d84034b 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -12,8 +12,27 @@
//===----------------------------------------------------------------------===//
#include "X86ELFWriterInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) :
- TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {}
+X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM) {
+ bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ EMachine = is64Bit ? EM_X86_64 : EM_386;
+ }
+
X86ELFWriterInfo::~X86ELFWriterInfo() {}
+
+unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const {
+ unsigned FnAlign = 4;
+
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ FnAlign = 1;
+
+ if (F->getAlignment())
+ FnAlign = Log2_32(F->getAlignment());
+
+ return (1 << FnAlign);
+}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index acfa501..e9c5bc4 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,8 +20,10 @@ namespace llvm {
class X86ELFWriterInfo : public TargetELFWriterInfo {
public:
- X86ELFWriterInfo(bool is64Bit);
+ X86ELFWriterInfo(TargetMachine &TM);
virtual ~X86ELFWriterInfo();
+
+ virtual unsigned getFunctionAlignment(const Function *F) const;
};
} // end llvm namespace
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 33332e4..2bcfd76 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
- else if (CC == CallingConv::Fast && isTaillCall)
- return CC_X86_64_TailCall;
else
return CC_X86_64_C;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9e15a54..36e3ab2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
SDValue StackAdjustment = TailCall.getOperand(2);
assert(((TargetAddress.getOpcode() == ISD::Register &&
(cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
- cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
+ cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) ||
TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
"Expecting an global address, external symbol, or register");
@@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
- else if (CC == CallingConv::Fast && PerformTailCallOpt)
- return CC_X86_64_TailCall;
else
return CC_X86_64_C;
}
@@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
} else if (IsTailCall) {
- unsigned Opc = Is64Bit ? X86::R9 : X86::EAX;
+ unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
DAG.getRegister(Opc, getPointerTy()),
@@ -7696,7 +7694,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
SelectionDAG &DAG, MachineFrameInfo *MFI,
const TargetLowering &TLI) {
LDBase = NULL;
- LastLoadedElt = -1;
+ LastLoadedElt = -1U;
for (unsigned i = 0; i < NumElems; ++i) {
if (N->getMaskElt(i) < 0) {
if (!LDBase)
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c733f26..6c0074e 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -822,6 +822,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
+ unsigned ReadyLabelId = 0;
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer is ready.
+ ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+ }
+
// Skip the callee-saved push instructions.
while (MBBI != MBB.end() &&
(MBBI->getOpcode() == X86::PUSH32r ||
@@ -831,67 +838,61 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- if (NumBytes) { // Adjust stack pointer: ESP -= numbytes.
- if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
- // Check, whether EAX is livein for this function.
- bool isEAXAlive = false;
- for (MachineRegisterInfo::livein_iterator
+ // Adjust stack pointer: ESP -= numbytes.
+ if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+ // Check, whether EAX is livein for this function.
+ bool isEAXAlive = false;
+ for (MachineRegisterInfo::livein_iterator
II = MF.getRegInfo().livein_begin(),
EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
- unsigned Reg = II->first;
- isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
- Reg == X86::AH || Reg == X86::AL);
- }
+ unsigned Reg = II->first;
+ isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL);
+ }
- // Function prologue calls _alloca to probe the stack when allocating more
- // than 4k bytes in one go. Touching the stack at 4K increments is
- // necessary to ensure that the guard pages used by the OS virtual memory
- // manager are allocated in correct sequence.
- if (!isEAXAlive) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca");
- } else {
- // Save EAX
- BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
- .addReg(X86::EAX, RegState::Kill);
-
- // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
- // allocated bytes for EAX.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes-4);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca");
-
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes-4);
- MBB.insert(MBBI, MI);
- }
+ // Function prologue calls _alloca to probe the stack when allocating more
+ // than 4k bytes in one go. Touching the stack at 4K increments is necessary
+ // to ensure that the guard pages used by the OS virtual memory manager are
+ // allocated in correct sequence.
+ if (!isEAXAlive) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
} else {
- // If there is an SUB32ri of ESP immediately before this instruction,
- // merge the two. This can be the case when tail call elimination is
- // enabled and the callee has more arguments then the caller.
- NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill);
+
+ // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+ // allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes - 4);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
+
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MBB.insert(MBBI, MI);
+ }
+ } else if (NumBytes) {
+ // If there is an SUB32ri of ESP immediately before this instruction, merge
+ // the two. This can be the case when tail call elimination is enabled and
+ // the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
- // If there is an ADD32ri or SUB32ri of ESP immediately after this
- // instruction, merge the two instructions.
- mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
- if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
- }
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
}
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer is ready.
- unsigned ReadyLabelId = 0;
- ReadyLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+ if (needsFrameMoves)
emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
- }
}
void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 03ce1ae..56983ce 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -350,6 +350,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
, MaxInlineSizeThreshold(128)
, Is64Bit(is64Bit)
, TargetType(isELF) { // Default to ELF unless otherwise specified.
+
+ // default to hard float ABI
+ if (FloatABIType == FloatABI::Default)
+ FloatABIType = FloatABI::Hard;
// Determine default and user specified characteristics
if (!FS.empty()) {
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 88ab247..dfb055f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -133,8 +133,7 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
DataLayout(Subtarget.getDataLayout()),
FrameInfo(TargetFrameInfo::StackGrowsDown,
Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this),
- ELFWriterInfo(Subtarget.is64Bit()) {
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
// FIXME: Correctly select PIC model for Win64 stuff
if (getRelocationModel() == Reloc::Default) {
OpenPOWER on IntegriCloud