diff options
author | ed <ed@FreeBSD.org> | 2009-06-06 08:20:29 +0000 |
---|---|---|
committer | ed <ed@FreeBSD.org> | 2009-06-06 08:20:29 +0000 |
commit | 2361a5c2bfbaef476824e51fa72712e334219c7b (patch) | |
tree | 8a1bbd1a5b838080d31e5c93a1817006b8c62318 /lib/Target/X86 | |
parent | 1941b8772a36a33c7b86cb67163cd735b3d57221 (diff) | |
download | FreeBSD-src-2361a5c2bfbaef476824e51fa72712e334219c7b.zip FreeBSD-src-2361a5c2bfbaef476824e51fa72712e334219c7b.tar.gz |
Import LLVM, at r72995.
We should now have support for #pragma weak.
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86ELFWriterInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86ELFWriterInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 192 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 21 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrMMX.td | 24 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrSSE.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86MachineFunctionInfo.h | 17 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.cpp | 35 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 10 |
10 files changed, 197 insertions, 112 deletions
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 4c3cc82..2604741 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -14,5 +14,6 @@ #include "X86ELFWriterInfo.h" using namespace llvm; -X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {} +X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) : + TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {} X86ELFWriterInfo::~X86ELFWriterInfo() {} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 06e051a..acfa501 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -20,7 +20,7 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { public: - X86ELFWriterInfo(); + X86ELFWriterInfo(bool is64Bit); virtual ~X86ELFWriterInfo(); }; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1f507c3..ef60ff5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -126,7 +126,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); - if (!UseSoftFloat && !NoImplicitFloat) { + if (!UseSoftFloat) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); @@ -550,6 +550,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); } // FIXME: In order to prevent SSE instructions being expanded to MMX ones @@ -734,6 +738,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + if (!DisableMMX && Subtarget->hasMMX()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); + } } if (Subtarget->hasSSE41()) { @@ -868,11 +878,14 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { /// determining it. MVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr) const { + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. - if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) { + const Function *F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) { if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) return MVT::v4i32; if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) @@ -1404,11 +1417,12 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, TotalNumXMMRegs); + bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); - assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) && + assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1()) + if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; @@ -2414,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) { - int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4) + if (VT.getVectorElementType().getSizeInBits() < 32) return false; + + int NumElts = VT.getVectorNumElements(); if (!isUndefOrEqual(Mask[0], NumElts)) return false; @@ -3068,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } // Special case for single non-zero, non-undef, element. - if (NumNonZero == 1 && NumElems <= 4) { + if (NumNonZero == 1) { unsigned Idx = CountTrailingZeros_32(NonZeros); SDValue Item = Op.getOperand(Idx); @@ -3109,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // If we have a constant or non-constant insertion into the low element of // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into // the rest of the elements. This will be matched as movd/movq/movss/movsd - // depending on what the source datatype is. Because we can only get here - // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. - if (Idx == 0 && - // Don't do this for i64 values on x86-32. - (EVT != MVT::i64 || Subtarget->is64Bit())) { - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasSSE2(), DAG); + // depending on what the source datatype is. + if (Idx == 0) { + if (NumZero == 0) { + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 || + (EVT == MVT::i64 && Subtarget->is64Bit())) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. + return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), + DAG); + } else if (EVT == MVT::i16 || EVT == MVT::i8) { + Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); + MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, + Subtarget->hasSSE2(), DAG); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); + } } // Is it a vector logical left shift? @@ -4248,7 +4272,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (EVT.getSizeInBits() == 16) { + if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. if (N1.getValueType() != MVT::i32) @@ -4554,6 +4578,14 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getValueType(); + + if (SrcVT.isVector()) { + if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { + return Op; + } + return SDValue(); + } + assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -4845,6 +4877,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { + if (Op.getValueType().isVector()) { + if (Op.getValueType() == MVT::v2i32 && + Op.getOperand(0).getValueType() == MVT::v2f64) { + return Op; + } + return SDValue(); + } + std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. @@ -7675,8 +7715,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, if (Elt.getOpcode() == ISD::UNDEF) continue; - if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, - EVT.getSizeInBits()/8, i, MFI)) + LoadSDNode *LD = cast<LoadSDNode>(Elt); + LoadSDNode *LDBase = cast<LoadSDNode>(Base); + if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) return false; } return true; @@ -7751,44 +7792,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) - // We are looking for load i64 and zero extend. We want to transform - // it before legalizer has a chance to expand it. Also look for i64 - // BUILD_PAIR bit casted to f64. - return SDValue(); - // This must be an insertion into a zero vector. - SDValue HighElt = N->getOperand(1); - if (!isZeroNode(HighElt)) - return SDValue(); + + // Before or during type legalization, we want to try and convert a + // build_vector of an i64 load and a zero value into vzext_movl before the + // legalizer can break it up. + // FIXME: does the case below remove the need to do this? + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) { + if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) + return SDValue(); + + // This must be an insertion into a zero vector. + SDValue HighElt = N->getOperand(1); + if (!isZeroNode(HighElt)) + return SDValue(); + + // Value must be a load. + SDNode *Base = N->getOperand(0).getNode(); + if (!isa<LoadSDNode>(Base)) { + if (Base->getOpcode() != ISD::BIT_CONVERT) + return SDValue(); + Base = Base->getOperand(0).getNode(); + if (!isa<LoadSDNode>(Base)) + return SDValue(); + } + + // Transform it into VZEXT_LOAD addr. + LoadSDNode *LD = cast<LoadSDNode>(Base); + + // Load must not be an extload. + if (LD->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + // Load type should legal type so we don't have to legalize it. + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return ResNode; + } + + // The type legalizer will have broken apart v2i64 build_vector created during + // widening before the code which handles that case is run. Look for build + // vector (load, load + 4, 0/undef, 0/undef) + if (VT == MVT::v4i32 || VT == MVT::v4f32) { + LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0)); + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1)); + if (!LD0 || !LD1) + return SDValue(); + if (LD0->getExtensionType() != ISD::NON_EXTLOAD || + LD1->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + // Make sure the second elt is a consecutive load. + if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1, + DAG.getMachineFunction().getFrameInfo())) + return SDValue(); - // Value must be a load. - SDNode *Base = N->getOperand(0).getNode(); - if (!isa<LoadSDNode>(Base)) { - if (Base->getOpcode() != ISD::BIT_CONVERT) + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF) return SDValue(); - Base = Base->getOperand(0).getNode(); - if (!isa<LoadSDNode>(Base)) + if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF) return SDValue(); + + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); } - - // Transform it into VZEXT_LOAD addr. - LoadSDNode *LD = cast<LoadSDNode>(Base); - - // Load must not be an extload. - if (LD->getExtensionType() != ISD::NON_EXTLOAD) - return SDValue(); - - // Load type should legal type so we don't have to legalize it. - if (!TLI.isTypeLegal(VT)) - return SDValue(); - - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - TargetLowering::TargetLoweringOpt TLO(DAG); - TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); - DCI.CommitTargetLoweringOpt(TLO); - return ResNode; + return SDValue(); } /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. @@ -8242,7 +8321,10 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, if (VT.getSizeInBits() != 64) return SDValue(); - bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2(); + const Function *F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps + && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa<LoadSDNode>(St->getValue()) && diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 550f8bd..fb4eb68 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -378,7 +378,8 @@ namespace llvm { /// determining it. virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr) const; + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2cd3733..8a9b7c9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2009,16 +2009,24 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize); + unsigned CalleeFrameSize = 0; unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - BuildMI(MBB, MI, DL, get(Opc)) - .addReg(Reg, RegState::Kill); + if (RegClass != &X86::VR128RegClass) { + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(Reg, RegState::Kill); + } else { + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); + } } + + X86FI->setCalleeSavedFrameSize(CalleeFrameSize); return true; } @@ -2036,7 +2044,12 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - BuildMI(MBB, MI, DL, get(Opc), Reg); + const TargetRegisterClass *RegClass = CSI[i].getRegClass(); + if (RegClass != &X86::VR128RegClass) { + BuildMI(MBB, MI, DL, get(Opc), Reg); + } else { + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + } } return true; } diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 8f287e1..43fadc2 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -577,41 +577,17 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; -// Move scalar to MMX zero-extended -// movd to MMX register zero-extends -let AddedComplexity = 15 in { - def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))), - (MMX_MOVZDI2PDIrr GR32:$src)>; - def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))), - (MMX_MOVZDI2PDIrr GR32:$src)>; -} - let AddedComplexity = 20 in { - def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), (MMX_MOVZDI2PDIrm addr:$src)>; } // Clear top half. let AddedComplexity = 15 in { - def : Pat<(v8i8 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; - def : Pat<(v4i16 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; def : Pat<(v2i32 (X86vzmovl VR64:$src)), (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; } -// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower -// 8 or 16-bits matter. -def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))), - (MMX_MOVD64rr GR32:$src)>; -def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))), - (MMX_MOVD64rr GR32:$src)>; - // Patterns to perform canonical versions of vector shuffling. let AddedComplexity = 10 in { def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1fafa46..b44c7a6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3447,7 +3447,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { } defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; -defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>; +defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 8a5ac2c..fafcf7e 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -73,14 +73,15 @@ public: SRetReturnReg(0), GlobalBaseReg(0) {} - X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), - CalleeSavedFrameSize(0), - BytesToPopOnReturn(0), - DecorationStyle(None), - ReturnAddrIndex(0), - TailCallReturnAddrDelta(0), - SRetReturnReg(0), - GlobalBaseReg(0) {} + explicit X86MachineFunctionInfo(MachineFunction &MF) + : ForceFramePointer(false), + CalleeSavedFrameSize(0), + BytesToPopOnReturn(0), + DecorationStyle(None), + ReturnAddrIndex(0), + TailCallReturnAddrDelta(0), + SRetReturnReg(0), + GlobalBaseReg(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 5af1fb1..c733f26 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -751,10 +751,12 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). + bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone); if (Is64Bit && !DisableRedZone && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls()) { // No calls. + !MFI->hasCalls() && // No calls. + !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (hasFP(MF)) MinSize += SlotSize; StackSize = std::max(MinSize, @@ -820,13 +822,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } - unsigned ReadyLabelId = 0; - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer is ready. - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); - } - // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || @@ -836,20 +831,20 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); - if (NumBytes) { // adjust stack pointer: ESP -= numbytes + if (NumBytes) { // Adjust stack pointer: ESP -= numbytes. if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { - // Check, whether EAX is livein for this function + // Check, whether EAX is livein for this function. bool isEAXAlive = false; for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { unsigned Reg = II->first; isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL); + Reg == X86::AH || Reg == X86::AL); } - // Function prologue calls _alloca to probe the stack when allocating - // more than 4k bytes in one go. Touching the stack at 4K increments is + // Function prologue calls _alloca to probe the stack when allocating more + // than 4k bytes in one go. Touching the stack at 4K increments is // necessary to ensure that the guard pages used by the OS virtual memory // manager are allocated in correct sequence. if (!isEAXAlive) { @@ -861,12 +856,14 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) .addReg(X86::EAX, RegState::Kill); + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, - TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4); + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes-4); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) .addExternalSymbol("_alloca"); + // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), @@ -878,6 +875,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // merge the two. This can be the case when tail call elimination is // enabled and the callee has more arguments then the caller. NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + // If there is an ADD32ri or SUB32ri of ESP immediately after this // instruction, merge the two instructions. mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); @@ -887,8 +885,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { } } - if (needsFrameMoves) + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer is ready. + unsigned ReadyLabelId = 0; + ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); emitFrameMoves(MF, FrameLabelId, ReadyLabelId); + } } void X86RegisterInfo::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 8264462..88ab247 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -133,7 +133,8 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), - InstrInfo(*this), JITInfo(*this), TLInfo(*this) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), + ELFWriterInfo(Subtarget.is64Bit()) { DefRelocModel = getRelocationModel(); // FIXME: Correctly select PIC model for Win64 stuff if (getRelocationModel() == Reloc::Default) { @@ -213,6 +214,13 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out) { + // FIXME: Move this somewhere else! + // On Darwin, override 64-bit static relocation to pic_ since the + // assembler doesn't support it. + if (DefRelocModel == Reloc::Static && + Subtarget.isTargetDarwin() && Subtarget.is64Bit()) + setRelocationModel(Reloc::PIC_); + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); |