summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
committerrdivacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
commitd2e985fd323c167e20f77b045a1d99ad166e65db (patch)
tree6a111e552c75afc66228e3d8f19b6731e4013f10 /lib/Target/X86
parentded64d5d348ce8d8c5aa42cf63f6de9dd84b7e89 (diff)
downloadFreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.zip
FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.tar.gz
Update LLVM to r89205.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp2
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp2
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp27
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp18
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp102
-rw-r--r--lib/Target/X86/X86ISelLowering.h8
-rw-r--r--lib/Target/X86/X86Instr64bit.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp169
-rw-r--r--lib/Target/X86/X86InstrInfo.h35
-rw-r--r--lib/Target/X86/X86InstrInfo.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td48
-rw-r--r--lib/Target/X86/X86JITInfo.cpp5
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp30
-rw-r--r--lib/Target/X86/X86RegisterInfo.h2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp125
-rw-r--r--lib/Target/X86/X86Subtarget.h12
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp19
18 files changed, 310 insertions, 300 deletions
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index ae8e6d3..b88063f 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -651,7 +651,7 @@ void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
printInstructionThroughMCStreamer(MI);
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 821cca4..be9f4b2 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Mangler.h"
#include "llvm/ADT/SmallString.h"
@@ -405,7 +406,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
printLabel(MI);
return;
case TargetInstrInfo::INLINEASM:
- O << '\t';
printInlineAsm(MI);
return;
case TargetInstrInfo::IMPLICIT_DEF:
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index a0bded3..4497931 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -82,7 +82,7 @@ namespace {
void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp = 0, intptr_t PCAdj = 0,
- bool NeedStub = false, bool Indirect = false);
+ bool Indirect = false);
void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
intptr_t PCAdj = 0);
@@ -176,7 +176,6 @@ template<class CodeEmitter>
void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp /* = 0 */,
intptr_t PCAdj /* = 0 */,
- bool NeedStub /* = false */,
bool Indirect /* = false */) {
intptr_t RelocCST = Disp;
if (Reloc == X86::reloc_picrel_word)
@@ -185,9 +184,9 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
RelocCST = PCAdj;
MachineRelocation MR = Indirect
? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- GV, RelocCST, NeedStub)
+ GV, RelocCST, false)
: MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- GV, RelocCST, NeedStub);
+ GV, RelocCST, false);
MCE.addRelocation(MR);
// The relocated value will be added to the displacement
if (Reloc == X86::reloc_absolute_dword)
@@ -333,10 +332,9 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
// do it, otherwise fallback to absolute (this is determined by IsPCRel).
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
- bool NeedStub = isa<Function>(RelocOp->getGlobal());
bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
- Adj, NeedStub, Indirect);
+ Adj, Indirect);
} else if (RelocOp->isSymbol()) {
emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
} else if (RelocOp->isCPI()) {
@@ -633,14 +631,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
}
if (MO.isGlobal()) {
- // Assume undefined functions may be outside the Small codespace.
- bool NeedStub =
- (Is64BitMode &&
- (TM.getCodeModel() == CodeModel::Large ||
- TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
- Opcode == X86::TAILJMPd;
emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
- MO.getOffset(), 0, NeedStub);
+ MO.getOffset(), 0);
break;
}
@@ -681,10 +673,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64ri)
rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
- bool NeedStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- NeedStub, Indirect);
+ Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@@ -790,10 +781,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64ri32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
- bool NeedStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- NeedStub, Indirect);
+ Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@@ -831,10 +821,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64mi32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO.isGlobal()) {
- bool NeedStub = isa<Function>(MO.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO, TM);
emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
- NeedStub, Indirect);
+ Indirect);
} else if (MO.isSymbol())
emitExternalSymbolAddress(MO.getSymbolName(), rt);
else if (MO.isCPI())
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3401df0..431c120 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1493,7 +1493,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
EVT ResVT = RVLocs[0].getValVT();
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
- int FI = MFI.CreateStackObject(MemSize, MemSize);
+ int FI = MFI.CreateStackObject(MemSize, MemSize, false);
addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
DstRC = ResVT == MVT::f32
? X86::FR32RegisterClass : X86::FR64RegisterClass;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 122f515..6a3577a 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -12,6 +12,15 @@
//
//===----------------------------------------------------------------------===//
+// Force NDEBUG on in any optimized build on Darwin.
+//
+// FIXME: This is a huge hack, to work around ridiculously awful compile times
+// on this file with gcc-4.2 on Darwin, in Release mode.
+#if (!defined(__llvm__) && defined(__APPLE__) && \
+ defined(__OPTIMIZE__) && !defined(NDEBUG))
+#define NDEBUG
+#endif
+
#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
@@ -661,7 +670,6 @@ void X86DAGToDAGISel::InstructionSelect() {
const Function *F = MF->getFunction();
OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
- DEBUG(BB->dump());
if (OptLevel != CodeGenOpt::None)
PreprocessForRMW();
@@ -1950,14 +1958,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
0);
// We just did a 32-bit clear, insert it into a 64-bit register to
// clear the whole 64-bit reg.
- SDValue Undef =
- SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
- dl, MVT::i64), 0);
+ SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
SDValue SubRegNo =
CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
ClrNode =
- SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
- MVT::i64, Undef, ClrNode, SubRegNo),
+ SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
+ MVT::i64, Zero, ClrNode, SubRegNo),
0);
} else {
ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 86ec9f2..6018cf5 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1087,6 +1087,17 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
#include "X86GenCallingConv.inc"
+bool
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ SelectionDAG &DAG) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86);
+}
+
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -1370,7 +1381,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), isImmutable);
+ VA.getLocMemOffset(), isImmutable, false);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal())
return FIN;
@@ -1499,7 +1510,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
- VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false);
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
@@ -1550,7 +1561,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
VarArgsGPOffset = NumIntRegs * 8;
VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
- TotalNumXMMRegs * 16, 16);
+ TotalNumXMMRegs * 16, 16,
+ false);
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
@@ -1671,7 +1683,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize,
+ true, false);
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
@@ -1884,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
- FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false);
FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal()) {
@@ -1924,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
FPDiff, dl);
}
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ bool WasGlobalOrExternal = false;
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+ // In the 64-bit large code model, we have to make all calls
+ // through a register, since the call instruction's 32-bit
+ // pc-relative offset may not be large enough to hold the whole
+ // address.
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ WasGlobalOrExternal = true;
+ // If the callee is a GlobalAddress node (quite common, every direct call
+ // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+ // it.
+
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
GlobalValue *GV = G->getGlobal();
@@ -1954,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -1971,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
- } else if (isTailCall) {
+ }
+
+ if (isTailCall && !WasGlobalOrExternal) {
unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
@@ -2169,7 +2195,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
- ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize);
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ true, false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -2517,6 +2544,21 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
isUndefOrEqual(N->getMaskElt(3), 3);
}
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 4)
+ return false;
+
+ return isUndefOrEqual(N->getMaskElt(0), 2) &&
+ isUndefOrEqual(N->getMaskElt(1), 3) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
@@ -2536,10 +2578,9 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
return true;
}
-/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
-/// and MOVLHPS.
-bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
+/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
+bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
@@ -2556,21 +2597,6 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
return true;
}
-/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
-/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
-/// <2, 3, 2, 3>
-bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
- if (NumElems != 4)
- return false;
-
- return isUndefOrEqual(N->getMaskElt(0), 2) &&
- isUndefOrEqual(N->getMaskElt(1), 3) &&
- isUndefOrEqual(N->getMaskElt(2), 2) &&
- isUndefOrEqual(N->getMaskElt(3), 3);
-}
-
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
@@ -4264,7 +4290,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
X86::isMOVSLDUPMask(SVOp) ||
X86::isMOVHLPSMask(SVOp) ||
- X86::isMOVHPMask(SVOp) ||
+ X86::isMOVLHPSMask(SVOp) ||
X86::isMOVLPMask(SVOp)))
return Op;
@@ -4961,7 +4987,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
@@ -4995,7 +5021,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SmallVector<SDValue, 8> Ops;
@@ -5205,7 +5231,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
- int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
@@ -5228,7 +5254,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
};
Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
Chain = Value.getValue(1);
- SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
}
@@ -6752,7 +6778,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
// Save FP Control Word to stack slot
- int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
@@ -7977,7 +8003,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
- int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
@@ -9585,14 +9611,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
// GCC allows "st(0)" to be called just plain "st".
- if (StringsEqualNoCase("{st}", Constraint)) {
+ if (StringRef("{st}").equals_lower(Constraint)) {
Res.first = X86::ST0;
Res.second = X86::RFP80RegisterClass;
return Res;
}
// flags -> EFLAGS
- if (StringsEqualNoCase("{flags}", Constraint)) {
+ if (StringRef("{flags}").equals_lower(Constraint)) {
Res.first = X86::EFLAGS;
Res.second = X86::CCRRegisterClass;
return Res;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7b59b81..7b4ab62 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -286,7 +286,7 @@ namespace llvm {
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
/// as well as MOVLHPS.
- bool isMOVHPMask(ShuffleVectorSDNode *N);
+ bool isMOVLHPSMask(ShuffleVectorSDNode *N);
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
@@ -699,6 +699,12 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG);
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ SelectionDAG &DAG);
+
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 3edced7..a01534b 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -309,7 +309,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
[(set GR64:$dst, i64immSExt32:$src)]>;
}
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (load addr:$src))]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 87bc10d..1ddceb1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -26,11 +26,15 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
+
+#include <limits>
+
using namespace llvm;
static cl::opt<bool>
@@ -707,9 +711,23 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
}
}
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const {
+ if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+ MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+ MI->getOperand(Op+1).getImm() == 1 &&
+ MI->getOperand(Op+2).getReg() == 0 &&
+ MI->getOperand(Op+3).getImm() == 0) {
+ FrameIndex = MI->getOperand(Op).getIndex();
+ return true;
+ }
+ return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+ switch (Opcode) {
default: break;
case X86::MOV8rm:
case X86::MOV16rm:
@@ -723,22 +741,14 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
- if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(4).isImm() &&
- MI->getOperand(2).getImm() == 1 &&
- MI->getOperand(3).getReg() == 0 &&
- MI->getOperand(4).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
+ return true;
break;
}
- return 0;
+ return false;
}
-unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
+static bool isFrameStoreOpcode(int Opcode) {
+ switch (Opcode) {
default: break;
case X86::MOV8mr:
case X86::MOV16mr:
@@ -753,19 +763,83 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case X86::MMX_MOVD64mr:
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
- if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
- MI->getOperand(2).isReg() && MI->getOperand(3).isImm() &&
- MI->getOperand(1).getImm() == 1 &&
- MI->getOperand(2).getReg() == 0 &&
- MI->getOperand(3).getImm() == 0) {
- FrameIndex = MI->getOperand(0).getIndex();
+ return true;
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode()))
+ if (isFrameOperand(MI, 1, FrameIndex))
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ return hasLoadFromStackSlot(MI, FrameIndex);
+ }
+ return 0;
+}
+
+bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode()))
+ if (isFrameOperand(MI, 0, FrameIndex))
return MI->getOperand(X86AddrNumOperands).getReg();
- }
- break;
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ return hasStoreToStackSlot(MI, FrameIndex);
}
return 0;
}
+bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ return true;
+ }
+ }
+ return false;
+}
+
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
@@ -794,10 +868,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVSSrm:
case X86::MOVSDrm:
case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm: {
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
// Loads from constant pools are trivially rematerializable.
if (MI->getOperand(1).isReg() &&
MI->getOperand(2).isImm() &&
@@ -917,12 +995,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const {
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = RI.getSubReg(DestReg, SubIdx);
+ DestReg = TRI->getSubReg(DestReg, SubIdx);
SubIdx = 0;
}
@@ -1891,8 +1970,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ bool isAligned = (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = DebugLoc::getUnknownLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -1985,8 +2063,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ bool isAligned = (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL = DebugLoc::getUnknownLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
@@ -2170,7 +2247,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// If table selected...
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
OpcodeTablePtr->find((unsigned*)MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
unsigned Opcode = I->second.first;
@@ -2402,7 +2479,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
OpcodeTablePtr->find((unsigned*)Opc);
if (I != OpcodeTablePtr->end())
return true;
@@ -2413,7 +2490,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
if (I == MemOp2RegOpTable.end())
return false;
@@ -2530,7 +2607,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
if (!N->isMachineOpcode())
return false;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
if (I == MemOp2RegOpTable.end())
return false;
@@ -2563,17 +2640,16 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
MachineFunction &MF = DAG.getMachineFunction();
if (FoldedLoad) {
EVT VT = *RC->vt_begin();
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ bool isAligned = (*MMOs.first)->getAlignment() >= 16;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
// Preserve memory reference information.
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
}
@@ -2601,8 +2677,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
AddrOps.pop_back();
AddrOps.push_back(SDValue(NewNode, 0));
AddrOps.push_back(Chain);
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ bool isAligned = (*MMOs.first)->getAlignment() >= 16;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,
@@ -2610,10 +2689,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
NewNodes.push_back(Store);
// Preserve memory reference information.
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
}
@@ -2623,7 +2698,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)Opc);
if (I == MemOp2RegOpTable.end())
return 0;
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 6eb07d5..c6daa25 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -449,13 +449,41 @@ public:
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// hasLoadFromStackSlot - If the specified machine instruction has
+ /// a load from a stack slot, return true along with the FrameIndex
+ /// of the loaded stack slot. If not, return false. Unlike
+ /// isLoadFromStackSlot, this returns true for any instructions that
+ /// loads from the stack. This is a hint only and may not catch all
+ /// cases.
+ bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// hasStoreToStackSlot - If the specified machine instruction has a
+ /// store to a stack slot, return true along with the FrameIndex of
+ /// the loaded stack slot. If not, return false. Unlike
+ /// isStoreToStackSlot, this returns true for any instructions that
+ /// loads from the stack. This is a hint only and may not catch all
+ /// cases.
+ bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const;
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
@@ -610,6 +638,11 @@ private:
unsigned OpNum,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+
+ /// isFrameOperand - Return true and the FrameIndex if the specified
+ /// operand and follow operands form a reference to the stack frame.
+ bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 9b82e1e..a79f262 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -543,7 +543,7 @@ let neverHasSideEffects = 1 in {
}
// Trap
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>;
def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
// PIC base construction. This expands to code that looks like this:
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index be242a0..ee63d56 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -225,9 +225,9 @@ def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
-def movhp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movlp : PatFrag<(ops node:$lhs, node:$rhs),
@@ -497,7 +497,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
// disregarded.
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
@@ -706,7 +706,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
let neverHasSideEffects = 1 in
def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (loadv4f32 addr:$src))]>;
@@ -715,7 +715,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v4f32 VR128:$src), addr:$dst)]>;
// Intrinsic forms of MOVUPS load and store
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
@@ -735,7 +735,7 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (movhp VR128:$src1,
+ (movlhps VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@@ -760,7 +760,7 @@ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -1256,7 +1256,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
// disregarded.
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
@@ -1494,7 +1494,7 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2f64 (movhp VR128:$src1,
+ (v2f64 (movlhps VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@@ -2085,7 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
[(set VR128:$dst, (v4i32 (pshufd:$src2
(bc_v4i32(memopv2i64 addr:$src1)),
(undef))))]>;
-}
+}
// SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
@@ -2874,7 +2874,7 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
Requires<[HasSSSE3]>;
-}
+}
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
(PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
@@ -3035,7 +3035,7 @@ def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
let AddedComplexity = 20 in {
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
-def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
+def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
@@ -3051,48 +3051,26 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
-// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
- (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
- (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
- (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
- (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
- addr:$src1),
- (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 62ca47f..0792bdd 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
// Rewrite the call target... so that we don't end up here every time we
// execute the call.
#if defined (X86_64_JIT)
- if (!isStub)
- *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ assert(isStub &&
+ "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+ " the call instruction varies too much.");
#else
*(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c5ff525..f577fcf 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -392,6 +392,11 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::SP);
Reserved.set(X86::SPL);
+ // Set the instruction pointer register and its aliases as reserved.
+ Reserved.set(X86::RIP);
+ Reserved.set(X86::EIP);
+ Reserved.set(X86::IP);
+
// Set the frame-pointer register and its aliases as reserved if needed.
if (hasFP(MF)) {
Reserved.set(X86::RBP);
@@ -450,12 +455,17 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool requiresRealignment =
+ RealignStack && (MFI->getMaxAlignment() > StackAlign);
// FIXME: Currently we don't support stack realignment for functions with
- // variable-sized allocas
- return (RealignStack &&
- (MFI->getMaxAlignment() > StackAlign &&
- !MFI->hasVarSizedObjects()));
+ // variable-sized allocas.
+ // FIXME: Temporary disable the error - it seems to be too conservative.
+ if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+ llvm_report_error(
+ "Stack realignment in presense of dynamic allocas is not supported");
+
+ return (requiresRealignment && !MFI->hasVarSizedObjects());
}
bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
@@ -610,8 +620,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Offset is a 32-bit integer.
int Offset = getFrameIndexOffset(MF, FrameIndex) +
(int)(MI.getOperand(i + 3).getImm());
-
- MI.getOperand(i + 3).ChangeToImmediate(Offset);
+
+ MI.getOperand(i + 3).ChangeToImmediate(Offset);
} else {
// Offset is symbolic. This is extremely rare.
uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
@@ -647,7 +657,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// }
// [EBP]
MFI->CreateFixedObject(-TailCallReturnAddrDelta,
- (-1U*SlotSize)+TailCallReturnAddrDelta);
+ (-1U*SlotSize)+TailCallReturnAddrDelta,
+ true, false);
}
if (hasFP(MF)) {
@@ -659,7 +670,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx = MFI->CreateFixedObject(SlotSize,
-(int)SlotSize +
TFI.getOffsetOfLocalArea() +
- TailCallReturnAddrDelta);
+ TailCallReturnAddrDelta,
+ true, false);
assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
FrameIdx = 0;
@@ -1271,7 +1283,7 @@ unsigned X86RegisterInfo::getRARegister() const {
: X86::EIP; // Should have dwarf #8.
}
-unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return hasFP(MF) ? FramePtr : StackPtr;
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index f635707..f281a3c 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -153,7 +153,7 @@ public:
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
int getFrameIndexOffset(MachineFunction &MF, int FI) const;
void getInitialFrameState(std::vector<MachineMove> &Moves) const;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 9525f04..b901c14 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -18,8 +18,10 @@
#include "llvm/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallVector.h"
using namespace llvm;
#if defined(_MSC_VER)
@@ -257,118 +259,6 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
}
-static const char *GetCurrentX86CPU() {
- unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
- if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
- return "generic";
- unsigned Family = 0;
- unsigned Model = 0;
- DetectFamilyModel(EAX, Family, Model);
-
- GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
- bool Em64T = (EDX >> 29) & 0x1;
- bool HasSSE3 = (ECX & 0x1);
-
- union {
- unsigned u[3];
- char c[12];
- } text;
-
- GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
- if (memcmp(text.c, "GenuineIntel", 12) == 0) {
- switch (Family) {
- case 3:
- return "i386";
- case 4:
- return "i486";
- case 5:
- switch (Model) {
- case 4: return "pentium-mmx";
- default: return "pentium";
- }
- case 6:
- switch (Model) {
- case 1: return "pentiumpro";
- case 3:
- case 5:
- case 6: return "pentium2";
- case 7:
- case 8:
- case 10:
- case 11: return "pentium3";
- case 9:
- case 13: return "pentium-m";
- case 14: return "yonah";
- case 15:
- case 22: // Celeron M 540
- return "core2";
- case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE)
- return "penryn";
- default: return "i686";
- }
- case 15: {
- switch (Model) {
- case 3:
- case 4:
- case 6: // same as 4, but 65nm
- return (Em64T) ? "nocona" : "prescott";
- case 26:
- return "corei7";
- case 28:
- return "atom";
- default:
- return (Em64T) ? "x86-64" : "pentium4";
- }
- }
-
- default:
- return "generic";
- }
- } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
- // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
- // appears to be no way to generate the wide variety of AMD-specific targets
- // from the information returned from CPUID.
- switch (Family) {
- case 4:
- return "i486";
- case 5:
- switch (Model) {
- case 6:
- case 7: return "k6";
- case 8: return "k6-2";
- case 9:
- case 13: return "k6-3";
- default: return "pentium";
- }
- case 6:
- switch (Model) {
- case 4: return "athlon-tbird";
- case 6:
- case 7:
- case 8: return "athlon-mp";
- case 10: return "athlon-xp";
- default: return "athlon";
- }
- case 15:
- if (HasSSE3) {
- return "k8-sse3";
- } else {
- switch (Model) {
- case 1: return "opteron";
- case 5: return "athlon-fx"; // also opteron
- default: return "athlon64";
- }
- }
- case 16:
- return "amdfam10";
- default:
- return "generic";
- }
- } else {
- return "generic";
- }
-}
-
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
bool is64Bit)
: PICStyle(PICStyles::None)
@@ -395,7 +285,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
// Determine default and user specified characteristics
if (!FS.empty()) {
// If feature string is not empty, parse features string.
- std::string CPU = GetCurrentX86CPU();
+ std::string CPU = sys::getHostCPUName();
ParseSubtargetFeatures(FS, CPU);
// All X86-64 CPUs also have SSE2, however user might request no SSE via
// -mattr, so don't force SSELevel here.
@@ -455,3 +345,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
if (StackAlignment)
stackAlignment = StackAlignment;
}
+
+bool X86Subtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index e64b854..23f2841 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -166,11 +166,11 @@ public:
std::string getDataLayout() const {
const char *p;
if (is64Bit())
- p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128";
+ p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
else if (isTargetDarwin())
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
else
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
return std::string(p);
}
@@ -219,10 +219,8 @@ public:
/// enablePostRAScheduler - X86 target is enabling post-alloc scheduling
/// at 'More' optimization level.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtarget::AntiDepBreakMode& mode) const {
- mode = TargetSubtarget::ANTIDEP_CRITICAL;
- return OptLevel >= CodeGenOpt::Default;
- }
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index a61de1c..0cda8bc 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -22,8 +22,7 @@
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static const MCAsmInfo *createMCAsmInfo(const Target &T,
- const StringRef &TT) {
+static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
case Triple::Darwin:
@@ -186,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
// 64-bit JIT places everything in the same buffer except external functions.
- // On Darwin, use small code model but hack the call instruction for
- // externals. Elsewhere, do not assume globals are in the lower 4G.
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTargetDarwin())
- setCodeModel(CodeModel::Small);
- else
+ if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
- }
PM.add(createX86CodeEmitterPass(*this, MCE));
@@ -212,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
// 64-bit JIT places everything in the same buffer except external functions.
- // On Darwin, use small code model but hack the call instruction for
- // externals. Elsewhere, do not assume globals are in the lower 4G.
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTargetDarwin())
- setCodeModel(CodeModel::Small);
- else
+ if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
- }
PM.add(createX86JITCodeEmitterPass(*this, JCE));
OpenPOWER on IntegriCloud