summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
authored <ed@FreeBSD.org>2009-06-06 08:20:29 +0000
committered <ed@FreeBSD.org>2009-06-06 08:20:29 +0000
commit2361a5c2bfbaef476824e51fa72712e334219c7b (patch)
tree8a1bbd1a5b838080d31e5c93a1817006b8c62318 /lib/Target/X86
parent1941b8772a36a33c7b86cb67163cd735b3d57221 (diff)
downloadFreeBSD-src-2361a5c2bfbaef476824e51fa72712e334219c7b.zip
FreeBSD-src-2361a5c2bfbaef476824e51fa72712e334219c7b.tar.gz
Import LLVM, at r72995.
We should now have support for #pragma weak.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp3
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp192
-rw-r--r--lib/Target/X86/X86ISelLowering.h3
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp21
-rw-r--r--lib/Target/X86/X86InstrMMX.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td2
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h17
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp35
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp10
10 files changed, 197 insertions, 112 deletions
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 4c3cc82..2604741 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -14,5 +14,6 @@
#include "X86ELFWriterInfo.h"
using namespace llvm;
-X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {}
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) :
+ TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {}
X86ELFWriterInfo::~X86ELFWriterInfo() {}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 06e051a..acfa501 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,7 +20,7 @@ namespace llvm {
class X86ELFWriterInfo : public TargetELFWriterInfo {
public:
- X86ELFWriterInfo();
+ X86ELFWriterInfo(bool is64Bit);
virtual ~X86ELFWriterInfo();
};
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1f507c3..ef60ff5 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -126,7 +126,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
- if (!UseSoftFloat && !NoImplicitFloat) {
+ if (!UseSoftFloat) {
// SSE has no i16 to fp conversion, only i32
if (X86ScalarSSEf32) {
setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
@@ -550,6 +550,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -734,6 +738,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ if (!DisableMMX && Subtarget->hasMMX()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
+ }
}
if (Subtarget->hasSSE41()) {
@@ -868,11 +878,14 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
/// determining it.
MVT
X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr) const {
+ bool isSrcConst, bool isSrcStr,
+ SelectionDAG &DAG) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
- if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) {
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
return MVT::v4i32;
if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
@@ -1404,11 +1417,12 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
TotalNumXMMRegs);
+ bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
"SSE register cannot be used when SSE is disabled!");
- assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) &&
+ assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!");
- if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1())
+ if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
// Kernel mode asks for SSE to be disabled, so don't push them
// on the stack.
TotalNumXMMRegs = 0;
@@ -2414,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) {
- int NumElts = VT.getVectorNumElements();
- if (NumElts != 2 && NumElts != 4)
+ if (VT.getVectorElementType().getSizeInBits() < 32)
return false;
+
+ int NumElts = VT.getVectorNumElements();
if (!isUndefOrEqual(Mask[0], NumElts))
return false;
@@ -3068,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
// Special case for single non-zero, non-undef, element.
- if (NumNonZero == 1 && NumElems <= 4) {
+ if (NumNonZero == 1) {
unsigned Idx = CountTrailingZeros_32(NonZeros);
SDValue Item = Op.getOperand(Idx);
@@ -3109,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
// If we have a constant or non-constant insertion into the low element of
// a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
// the rest of the elements. This will be matched as movd/movq/movss/movsd
- // depending on what the source datatype is. Because we can only get here
- // when NumElems <= 4, this only needs to handle i32/f32/i64/f64.
- if (Idx == 0 &&
- // Don't do this for i64 values on x86-32.
- (EVT != MVT::i64 || Subtarget->is64Bit())) {
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
- return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
- Subtarget->hasSSE2(), DAG);
+ // depending on what the source datatype is.
+ if (Idx == 0) {
+ if (NumZero == 0) {
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 ||
+ (EVT == MVT::i64 && Subtarget->is64Bit())) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+ // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+ DAG);
+ } else if (EVT == MVT::i16 || EVT == MVT::i8) {
+ Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+ MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32;
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+ Subtarget->hasSSE2(), DAG);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item);
+ }
}
// Is it a vector logical left shift?
@@ -4248,7 +4272,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue N1 = Op.getOperand(1);
SDValue N2 = Op.getOperand(2);
- if (EVT.getSizeInBits() == 16) {
+ if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
// Transform it so it match pinsrw which expects a 16-bit value in a GR32
// as its second argument.
if (N1.getValueType() != MVT::i32)
@@ -4554,6 +4578,14 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getValueType();
+
+ if (SrcVT.isVector()) {
+ if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) {
+ return Op;
+ }
+ return SDValue();
+ }
+
assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
@@ -4845,6 +4877,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
}
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+ if (Op.getValueType().isVector()) {
+ if (Op.getValueType() == MVT::v2i32 &&
+ Op.getOperand(0).getValueType() == MVT::v2f64) {
+ return Op;
+ }
+ return SDValue();
+ }
+
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
@@ -7675,8 +7715,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
if (Elt.getOpcode() == ISD::UNDEF)
continue;
- if (!TLI.isConsecutiveLoad(Elt.getNode(), Base,
- EVT.getSizeInBits()/8, i, MFI))
+ LoadSDNode *LD = cast<LoadSDNode>(Elt);
+ LoadSDNode *LDBase = cast<LoadSDNode>(Base);
+ if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI))
return false;
}
return true;
@@ -7751,44 +7792,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG,
MVT VT = N->getValueType(0);
MVT EVT = VT.getVectorElementType();
- if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
- // We are looking for load i64 and zero extend. We want to transform
- // it before legalizer has a chance to expand it. Also look for i64
- // BUILD_PAIR bit casted to f64.
- return SDValue();
- // This must be an insertion into a zero vector.
- SDValue HighElt = N->getOperand(1);
- if (!isZeroNode(HighElt))
- return SDValue();
+
+ // Before or during type legalization, we want to try and convert a
+ // build_vector of an i64 load and a zero value into vzext_movl before the
+ // legalizer can break it up.
+ // FIXME: does the case below remove the need to do this?
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) {
+ if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit())
+ return SDValue();
+
+ // This must be an insertion into a zero vector.
+ SDValue HighElt = N->getOperand(1);
+ if (!isZeroNode(HighElt))
+ return SDValue();
+
+ // Value must be a load.
+ SDNode *Base = N->getOperand(0).getNode();
+ if (!isa<LoadSDNode>(Base)) {
+ if (Base->getOpcode() != ISD::BIT_CONVERT)
+ return SDValue();
+ Base = Base->getOperand(0).getNode();
+ if (!isa<LoadSDNode>(Base))
+ return SDValue();
+ }
+
+ // Transform it into VZEXT_LOAD addr.
+ LoadSDNode *LD = cast<LoadSDNode>(Base);
+
+ // Load must not be an extload.
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ // Load type should legal type so we don't have to legalize it.
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
+ DCI.CommitTargetLoweringOpt(TLO);
+ return ResNode;
+ }
+
+ // The type legalizer will have broken apart v2i64 build_vector created during
+ // widening before the code which handles that case is run. Look for build
+ // vector (load, load + 4, 0/undef, 0/undef)
+ if (VT == MVT::v4i32 || VT == MVT::v4f32) {
+ LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0));
+ LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1));
+ if (!LD0 || !LD1)
+ return SDValue();
+ if (LD0->getExtensionType() != ISD::NON_EXTLOAD ||
+ LD1->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+ // Make sure the second elt is a consecutive load.
+ if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1,
+ DAG.getMachineFunction().getFrameInfo()))
+ return SDValue();
- // Value must be a load.
- SDNode *Base = N->getOperand(0).getNode();
- if (!isa<LoadSDNode>(Base)) {
- if (Base->getOpcode() != ISD::BIT_CONVERT)
+ SDValue N2 = N->getOperand(2);
+ SDValue N3 = N->getOperand(3);
+ if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF)
return SDValue();
- Base = Base->getOperand(0).getNode();
- if (!isa<LoadSDNode>(Base))
+ if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF)
return SDValue();
+
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ TargetLowering::TargetLoweringOpt TLO(DAG);
+ TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1));
+ DCI.CommitTargetLoweringOpt(TLO);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
}
-
- // Transform it into VZEXT_LOAD addr.
- LoadSDNode *LD = cast<LoadSDNode>(Base);
-
- // Load must not be an extload.
- if (LD->getExtensionType() != ISD::NON_EXTLOAD)
- return SDValue();
-
- // Load type should legal type so we don't have to legalize it.
- if (!TLI.isTypeLegal(VT))
- return SDValue();
-
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- TargetLowering::TargetLoweringOpt TLO(DAG);
- TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1));
- DCI.CommitTargetLoweringOpt(TLO);
- return ResNode;
+ return SDValue();
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
@@ -8242,7 +8321,10 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
if (VT.getSizeInBits() != 64)
return SDValue();
- bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2();
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+ bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+ && Subtarget->hasSSE2();
if ((VT.isVector() ||
(VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
isa<LoadSDNode>(St->getValue()) &&
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 550f8bd..fb4eb68 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -378,7 +378,8 @@ namespace llvm {
/// determining it.
virtual
MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr) const;
+ bool isSrcConst, bool isSrcStr,
+ SelectionDAG &DAG) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 2cd3733..8a9b7c9 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2009,16 +2009,24 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize);
+ unsigned CalleeFrameSize = 0;
unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
+ const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
- BuildMI(MBB, MI, DL, get(Opc))
- .addReg(Reg, RegState::Kill);
+ if (RegClass != &X86::VR128RegClass) {
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, get(Opc))
+ .addReg(Reg, RegState::Kill);
+ } else {
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
+ }
}
+
+ X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
return true;
}
@@ -2036,7 +2044,12 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- BuildMI(MBB, MI, DL, get(Opc), Reg);
+ const TargetRegisterClass *RegClass = CSI[i].getRegClass();
+ if (RegClass != &X86::VR128RegClass) {
+ BuildMI(MBB, MI, DL, get(Opc), Reg);
+ } else {
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+ }
}
return true;
}
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 8f287e1..43fadc2 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -577,41 +577,17 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))),
def : Pat<(f64 (bitconvert (v8i8 VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
-// Move scalar to MMX zero-extended
-// movd to MMX register zero-extends
-let AddedComplexity = 15 in {
- def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))),
- (MMX_MOVZDI2PDIrr GR32:$src)>;
- def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))),
- (MMX_MOVZDI2PDIrr GR32:$src)>;
-}
-
let AddedComplexity = 20 in {
- def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))),
- (MMX_MOVZDI2PDIrm addr:$src)>;
- def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))),
- (MMX_MOVZDI2PDIrm addr:$src)>;
def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))),
(MMX_MOVZDI2PDIrm addr:$src)>;
}
// Clear top half.
let AddedComplexity = 15 in {
- def : Pat<(v8i8 (X86vzmovl VR64:$src)),
- (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
- def : Pat<(v4i16 (X86vzmovl VR64:$src)),
- (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
def : Pat<(v2i32 (X86vzmovl VR64:$src)),
(MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>;
}
-// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower
-// 8 or 16-bits matter.
-def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))),
- (MMX_MOVD64rr GR32:$src)>;
-def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))),
- (MMX_MOVD64rr GR32:$src)>;
-
// Patterns to perform canonical versions of vector shuffling.
let AddedComplexity = 10 in {
def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1fafa46..b44c7a6 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3447,7 +3447,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
}
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
-defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
// Common patterns involving scalar load
def : Pat<(int_x86_sse41_pmovsxbq
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 8a5ac2c..fafcf7e 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -73,14 +73,15 @@ public:
SRetReturnReg(0),
GlobalBaseReg(0) {}
- X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false),
- CalleeSavedFrameSize(0),
- BytesToPopOnReturn(0),
- DecorationStyle(None),
- ReturnAddrIndex(0),
- TailCallReturnAddrDelta(0),
- SRetReturnReg(0),
- GlobalBaseReg(0) {}
+ explicit X86MachineFunctionInfo(MachineFunction &MF)
+ : ForceFramePointer(false),
+ CalleeSavedFrameSize(0),
+ BytesToPopOnReturn(0),
+ DecorationStyle(None),
+ ReturnAddrIndex(0),
+ TailCallReturnAddrDelta(0),
+ SRetReturnReg(0),
+ GlobalBaseReg(0) {}
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 5af1fb1..c733f26 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -751,10 +751,12 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
// stack pointer (we fit in the Red Zone).
+ bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone);
if (Is64Bit && !DisableRedZone &&
!needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->hasCalls()) { // No calls.
+ !MFI->hasCalls() && // No calls.
+ !Subtarget->isTargetWin64()) { // Win64 has no Red Zone
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (hasFP(MF)) MinSize += SlotSize;
StackSize = std::max(MinSize,
@@ -820,13 +822,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
- unsigned ReadyLabelId = 0;
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer is ready.
- ReadyLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
- }
-
// Skip the callee-saved push instructions.
while (MBBI != MBB.end() &&
(MBBI->getOpcode() == X86::PUSH32r ||
@@ -836,20 +831,20 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- if (NumBytes) { // adjust stack pointer: ESP -= numbytes
+ if (NumBytes) { // Adjust stack pointer: ESP -= numbytes.
if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
- // Check, whether EAX is livein for this function
+ // Check, whether EAX is livein for this function.
bool isEAXAlive = false;
for (MachineRegisterInfo::livein_iterator
II = MF.getRegInfo().livein_begin(),
EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
unsigned Reg = II->first;
isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
- Reg == X86::AH || Reg == X86::AL);
+ Reg == X86::AH || Reg == X86::AL);
}
- // Function prologue calls _alloca to probe the stack when allocating
- // more than 4k bytes in one go. Touching the stack at 4K increments is
+ // Function prologue calls _alloca to probe the stack when allocating more
+ // than 4k bytes in one go. Touching the stack at 4K increments is
// necessary to ensure that the guard pages used by the OS virtual memory
// manager are allocated in correct sequence.
if (!isEAXAlive) {
@@ -861,12 +856,14 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
.addReg(X86::EAX, RegState::Kill);
+
// Allocate NumBytes-4 bytes on stack. We'll also use 4 already
// allocated bytes for EAX.
- BuildMI(MBB, MBBI, DL,
- TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes-4);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
.addExternalSymbol("_alloca");
+
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
X86::EAX),
@@ -878,6 +875,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
// merge the two. This can be the case when tail call elimination is
// enabled and the callee has more arguments then the caller.
NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
// If there is an ADD32ri or SUB32ri of ESP immediately after this
// instruction, merge the two instructions.
mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
@@ -887,8 +885,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
}
}
- if (needsFrameMoves)
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer is ready.
+ unsigned ReadyLabelId = 0;
+ ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+ }
}
void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 8264462..88ab247 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -133,7 +133,8 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
DataLayout(Subtarget.getDataLayout()),
FrameInfo(TargetFrameInfo::StackGrowsDown,
Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this) {
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this),
+ ELFWriterInfo(Subtarget.is64Bit()) {
DefRelocModel = getRelocationModel();
// FIXME: Correctly select PIC model for Win64 stuff
if (getRelocationModel() == Reloc::Default) {
@@ -213,6 +214,13 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
bool Verbose,
raw_ostream &Out) {
+ // FIXME: Move this somewhere else!
+ // On Darwin, override 64-bit static relocation to pic_ since the
+ // assembler doesn't support it.
+ if (DefRelocModel == Reloc::Static &&
+ Subtarget.isTargetDarwin() && Subtarget.is64Bit())
+ setRelocationModel(Reloc::PIC_);
+
assert(AsmPrinterCtor && "AsmPrinter was not linked in");
if (AsmPrinterCtor)
PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
OpenPOWER on IntegriCloud