From d2e985fd323c167e20f77b045a1d99ad166e65db Mon Sep 17 00:00:00 2001 From: rdivacky Date: Wed, 18 Nov 2009 14:58:34 +0000 Subject: Update LLVM to r89205. --- lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 2 +- lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 2 +- lib/Target/X86/X86CodeEmitter.cpp | 27 ++--- lib/Target/X86/X86FastISel.cpp | 2 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 18 ++- lib/Target/X86/X86ISelLowering.cpp | 102 ++++++++++------ lib/Target/X86/X86ISelLowering.h | 8 +- lib/Target/X86/X86Instr64bit.td | 2 +- lib/Target/X86/X86InstrInfo.cpp | 169 +++++++++++++++++++-------- lib/Target/X86/X86InstrInfo.h | 35 +++++- lib/Target/X86/X86InstrInfo.td | 2 +- lib/Target/X86/X86InstrSSE.td | 48 +++----- lib/Target/X86/X86JITInfo.cpp | 5 +- lib/Target/X86/X86RegisterInfo.cpp | 30 +++-- lib/Target/X86/X86RegisterInfo.h | 2 +- lib/Target/X86/X86Subtarget.cpp | 125 ++------------------ lib/Target/X86/X86Subtarget.h | 12 +- lib/Target/X86/X86TargetMachine.cpp | 19 +-- 18 files changed, 310 insertions(+), 300 deletions(-) (limited to 'lib/Target/X86') diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index ae8e6d3..b88063f 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -651,7 +651,7 @@ void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) { printInstructionThroughMCStreamer(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 821cca4..be9f4b2 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/SmallString.h" @@ -405,7 +406,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { printLabel(MI); return; case TargetInstrInfo::INLINEASM: - O << '\t'; printInlineAsm(MI); return; case TargetInstrInfo::IMPLICIT_DEF: diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index a0bded3..4497931 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -82,7 +82,7 @@ namespace { void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0, - bool NeedStub = false, bool Indirect = false); + bool Indirect = false); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0); @@ -176,7 +176,6 @@ template void Emitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp /* = 0 */, intptr_t PCAdj /* = 0 */, - bool NeedStub /* = false */, bool Indirect /* = false */) { intptr_t RelocCST = Disp; if (Reloc == X86::reloc_picrel_word) @@ -185,9 +184,9 @@ void Emitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, RelocCST = PCAdj; MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, NeedStub) + GV, RelocCST, false) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, NeedStub); + GV, RelocCST, false); MCE.addRelocation(MR); // The relocated value will be added to the displacement if (Reloc == X86::reloc_absolute_dword) @@ -333,10 +332,9 @@ void Emitter::emitDisplacementField(const MachineOperand *RelocOp, // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute - bool NeedStub = isa(RelocOp->getGlobal()); bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), - Adj, NeedStub, Indirect); + Adj, Indirect); } else if (RelocOp->isSymbol()) { emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); } else if (RelocOp->isCPI()) { @@ -633,14 +631,8 @@ void Emitter::emitInstruction(const MachineInstr &MI, } if (MO.isGlobal()) { - // Assume undefined functions may be outside the Small codespace. - bool NeedStub = - (Is64BitMode && - (TM.getCodeModel() == CodeModel::Large || - TM.getSubtarget().isTargetDarwin())) || - Opcode == X86::TAILJMPd; emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, - MO.getOffset(), 0, NeedStub); + MO.getOffset(), 0); break; } @@ -681,10 +673,9 @@ void Emitter::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64ri) rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool NeedStub = isa(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -790,10 +781,9 @@ void Emitter::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64ri32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool NeedStub = isa(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -831,10 +821,9 @@ void Emitter::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64mi32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO.isGlobal()) { - bool NeedStub = isa(MO.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO, TM); emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), rt); else if (MO.isCPI()) diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3401df0..431c120 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1493,7 +1493,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { EVT ResVT = RVLocs[0].getValVT(); unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; - int FI = MFI.CreateStackObject(MemSize, MemSize); + int FI = MFI.CreateStackObject(MemSize, MemSize, false); addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); DstRC = ResVT == MVT::f32 ? X86::FR32RegisterClass : X86::FR64RegisterClass; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 122f515..6a3577a 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,6 +12,15 @@ // //===----------------------------------------------------------------------===// +// Force NDEBUG on in any optimized build on Darwin. +// +// FIXME: This is a huge hack, to work around ridiculously awful compile times +// on this file with gcc-4.2 on Darwin, in Release mode. +#if (!defined(__llvm__) && defined(__APPLE__) && \ + defined(__OPTIMIZE__) && !defined(NDEBUG)) +#define NDEBUG +#endif + #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" @@ -661,7 +670,6 @@ void X86DAGToDAGISel::InstructionSelect() { const Function *F = MF->getFunction(); OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); - DEBUG(BB->dump()); if (OptLevel != CodeGenOpt::None) PreprocessForRMW(); @@ -1950,14 +1958,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { 0); // We just did a 32-bit clear, insert it into a 64-bit register to // clear the whole 64-bit reg. - SDValue Undef = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, - dl, MVT::i64), 0); + SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); SDValue SubRegNo = CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); ClrNode = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, - MVT::i64, Undef, ClrNode, SubRegNo), + SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, + MVT::i64, Zero, ClrNode, SubRegNo), 0); } else { ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 86ec9f2..6018cf5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1087,6 +1087,17 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { #include "X86GenCallingConv.inc" +bool +X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &OutTys, + const SmallVectorImpl &ArgsFlags, + SelectionDAG &DAG) { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86); +} + SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1370,7 +1381,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable); + VA.getLocMemOffset(), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) return FIN; @@ -1499,7 +1510,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { if (Is64Bit || CallConv != CallingConv::X86_FastCall) { - VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); + VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1550,7 +1561,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, VarArgsGPOffset = NumIntRegs * 8; VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + - TotalNumXMMRegs * 16, 16); + TotalNumXMMRegs * 16, 16, + false); // Store the integer parameter registers. SmallVector MemOps; @@ -1671,7 +1683,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, + true, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, @@ -1884,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false); FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) { @@ -1924,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, FPDiff, dl); } - // If the callee is a GlobalAddress node (quite common, every direct call is) - // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + bool WasGlobalOrExternal = false; + if (getTargetMachine().getCodeModel() == CodeModel::Large) { + assert(Is64Bit && "Large code model is only legal in 64-bit mode."); + // In the 64-bit large code model, we have to make all calls + // through a register, since the call instruction's 32-bit + // pc-relative offset may not be large enough to hold the whole + // address. + } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + WasGlobalOrExternal = true; + // If the callee is a GlobalAddress node (quite common, every direct call + // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack + // it. + // We should use extra load for direct calls to dllimported functions in // non-JIT mode. GlobalValue *GV = G->getGlobal(); @@ -1954,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -1971,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlags); - } else if (isTailCall) { + } + + if (isTailCall && !WasGlobalOrExternal) { unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, @@ -2169,7 +2195,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); - ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize); + ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, + true, false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -2517,6 +2544,21 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { isUndefOrEqual(N->getMaskElt(3), 3); } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + return isUndefOrEqual(N->getMaskElt(0), 2) && + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); +} + /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { @@ -2536,10 +2578,9 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { return true; } -/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} -/// and MOVLHPS. -bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { +/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVLHPS. +bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); if (NumElems != 2 && NumElems != 4) @@ -2556,21 +2597,6 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { return true; } -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); - - if (NumElems != 4) - return false; - - return isUndefOrEqual(N->getMaskElt(0), 2) && - isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && - isUndefOrEqual(N->getMaskElt(3), 3); -} - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(const SmallVectorImpl &Mask, EVT VT, @@ -4264,7 +4290,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || X86::isMOVSLDUPMask(SVOp) || X86::isMOVHLPSMask(SVOp) || - X86::isMOVHPMask(SVOp) || + X86::isMOVLHPSMask(SVOp) || X86::isMOVLPMask(SVOp))) return Op; @@ -4961,7 +4987,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); + int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, @@ -4995,7 +5021,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, // shouldn't be necessary except that RFP cannot be live across // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); Tys = DAG.getVTList(MVT::Other); SmallVector Ops; @@ -5205,7 +5231,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { // stack slot. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; - int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); unsigned Opc; @@ -5228,7 +5254,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { }; Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3); Chain = Value.getValue(1); - SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); } @@ -6752,7 +6778,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); // Save FP Control Word to stack slot - int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); + int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other, @@ -7977,7 +8003,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); - int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); + int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... @@ -9585,14 +9611,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } // GCC allows "st(0)" to be called just plain "st". - if (StringsEqualNoCase("{st}", Constraint)) { + if (StringRef("{st}").equals_lower(Constraint)) { Res.first = X86::ST0; Res.second = X86::RFP80RegisterClass; return Res; } // flags -> EFLAGS - if (StringsEqualNoCase("{flags}", Constraint)) { + if (StringRef("{flags}").equals_lower(Constraint)) { Res.first = X86::EFLAGS; Res.second = X86::CCRRegisterClass; return Res; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7b59b81..7b4ab62 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -286,7 +286,7 @@ namespace llvm { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. /// as well as MOVLHPS. - bool isMOVHPMask(ShuffleVectorSDNode *N); + bool isMOVLHPSMask(ShuffleVectorSDNode *N); /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. @@ -699,6 +699,12 @@ namespace llvm { const SmallVectorImpl &Outs, DebugLoc dl, SelectionDAG &DAG); + virtual bool + CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &OutTys, + const SmallVectorImpl &ArgsFlags, + SelectionDAG &DAG); + void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, unsigned NewOp); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 3edced7..a01534b 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -309,7 +309,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), [(set GR64:$dst, i64immSExt32:$src)]>; } -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 87bc10d..1ddceb1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -26,11 +26,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" + +#include + using namespace llvm; static cl::opt @@ -707,9 +711,23 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, } } -unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { +/// isFrameOperand - Return true and the FrameIndex if the specified +/// operand and follow operands form a reference to the stack frame. +bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, + int &FrameIndex) const { + if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && + MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && + MI->getOperand(Op+1).getImm() == 1 && + MI->getOperand(Op+2).getReg() == 0 && + MI->getOperand(Op+3).getImm() == 0) { + FrameIndex = MI->getOperand(Op).getIndex(); + return true; + } + return false; +} + +static bool isFrameLoadOpcode(int Opcode) { + switch (Opcode) { default: break; case X86::MOV8rm: case X86::MOV16rm: @@ -723,22 +741,14 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case X86::MOVDQArm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: - if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(4).isImm() && - MI->getOperand(2).getImm() == 1 && - MI->getOperand(3).getReg() == 0 && - MI->getOperand(4).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } + return true; break; } - return 0; + return false; } -unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { +static bool isFrameStoreOpcode(int Opcode) { + switch (Opcode) { default: break; case X86::MOV8mr: case X86::MOV16mr: @@ -753,19 +763,83 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: - if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && - MI->getOperand(2).isReg() && MI->getOperand(3).isImm() && - MI->getOperand(1).getImm() == 1 && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(0).getIndex(); + return true; + } + return false; +} + +unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameLoadOpcode(MI->getOpcode())) + if (isFrameOperand(MI, 1, FrameIndex)) + return MI->getOperand(0).getReg(); + return 0; +} + +unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameLoadOpcode(MI->getOpcode())) { + unsigned Reg; + if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) + return Reg; + // Check for post-frame index elimination operations + return hasLoadFromStackSlot(MI, FrameIndex); + } + return 0; +} + +bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isLoad() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + return true; + } + } + return false; +} + +unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameStoreOpcode(MI->getOpcode())) + if (isFrameOperand(MI, 0, FrameIndex)) return MI->getOperand(X86AddrNumOperands).getReg(); - } - break; + return 0; +} + +unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameStoreOpcode(MI->getOpcode())) { + unsigned Reg; + if ((Reg = isStoreToStackSlot(MI, FrameIndex))) + return Reg; + // Check for post-frame index elimination operations + return hasStoreToStackSlot(MI, FrameIndex); } return 0; } +bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isStore() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + return true; + } + } + return false; +} + /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -794,10 +868,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVSSrm: case X86::MOVSDrm: case X86::MOVAPSrm: + case X86::MOVUPSrm: + case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: { + case X86::MMX_MOVQ64rm: + case X86::FsMOVAPSrm: + case X86::FsMOVAPDrm: { // Loads from constant pools are trivially rematerializable. if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && @@ -917,12 +995,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const { + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = RI.getSubReg(DestReg, SubIdx); + DestReg = TRI->getSubReg(DestReg, SubIdx); SubIdx = 0; } @@ -1891,8 +1970,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -1985,8 +2063,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2170,7 +2247,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap >::iterator I = + DenseMap >::const_iterator I = OpcodeTablePtr->find((unsigned*)MI->getOpcode()); if (I != OpcodeTablePtr->end()) { unsigned Opcode = I->second.first; @@ -2402,7 +2479,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap >::iterator I = + DenseMap >::const_iterator I = OpcodeTablePtr->find((unsigned*)Opc); if (I != OpcodeTablePtr->end()) return true; @@ -2413,7 +2490,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl &NewMIs) const { - DenseMap >::iterator I = + DenseMap >::const_iterator I = MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); if (I == MemOp2RegOpTable.end()) return false; @@ -2530,7 +2607,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (!N->isMachineOpcode()) return false; - DenseMap >::iterator I = + DenseMap >::const_iterator I = MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); if (I == MemOp2RegOpTable.end()) return false; @@ -2563,17 +2640,16 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { EVT VT = *RC->vt_begin(); - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + std::pair MMOs = + MF.extractLoadMemRefs(cast(N)->memoperands_begin(), + cast(N)->memoperands_end()); + bool isAligned = (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); // Preserve memory reference information. - std::pair MMOs = - MF.extractLoadMemRefs(cast(N)->memoperands_begin(), - cast(N)->memoperands_end()); cast(Load)->setMemRefs(MMOs.first, MMOs.second); } @@ -2601,8 +2677,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.pop_back(); AddrOps.push_back(SDValue(NewNode, 0)); AddrOps.push_back(Chain); - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + std::pair MMOs = + MF.extractStoreMemRefs(cast(N)->memoperands_begin(), + cast(N)->memoperands_end()); + bool isAligned = (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -2610,10 +2689,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, NewNodes.push_back(Store); // Preserve memory reference information. - std::pair MMOs = - MF.extractStoreMemRefs(cast(N)->memoperands_begin(), - cast(N)->memoperands_end()); cast(Load)->setMemRefs(MMOs.first, MMOs.second); } @@ -2623,7 +2698,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex) const { - DenseMap >::iterator I = + DenseMap >::const_iterator I = MemOp2RegOpTable.find((unsigned*)Opc); if (I == MemOp2RegOpTable.end()) return 0; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 6eb07d5..c6daa25 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -449,13 +449,41 @@ public: unsigned &SrcSubIdx, unsigned &DstSubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination + /// stack locations as well. This uses a heuristic so it isn't + /// reliable for correctness. + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + + /// hasLoadFromStackSlot - If the specified machine instruction has + /// a load from a stack slot, return true along with the FrameIndex + /// of the loaded stack slot. If not, return false. Unlike + /// isLoadFromStackSlot, this returns true for any instructions that + /// loads from the stack. This is a hint only and may not catch all + /// cases. + bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination + /// stack locations as well. This uses a heuristic so it isn't + /// reliable for correctness. + unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + + /// hasStoreToStackSlot - If the specified machine instruction has a + /// store to a stack slot, return true along with the FrameIndex of + /// the loaded stack slot. If not, return false. Unlike + /// isStoreToStackSlot, this returns true for any instructions that + /// loads from the stack. This is a hint only and may not catch all + /// cases. + bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const; + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -610,6 +638,11 @@ private: unsigned OpNum, const SmallVectorImpl &MOs, unsigned Size, unsigned Alignment) const; + + /// isFrameOperand - Return true and the FrameIndex if the specified + /// operand and follow operands form a reference to the stack frame. + bool isFrameOperand(const MachineInstr *MI, unsigned int Op, + int &FrameIndex) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9b82e1e..a79f262 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -543,7 +543,7 @@ let neverHasSideEffects = 1 in { } // Trap -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>; def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; // PIC base construction. This expands to code that looks like this: diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index be242a0..ee63d56 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -225,9 +225,9 @@ def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), return X86::isMOVHLPS_v_undef_Mask(cast(N)); }]>; -def movhp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHPMask(cast(N)); +def movlhps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLHPSMask(cast(N)); }]>; def movlp : PatFrag<(ops node:$lhs, node:$rhs), @@ -497,7 +497,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), // Alias instruction to load FR32 from f128mem using movaps. Upper bits are // disregarded. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; @@ -706,7 +706,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), let neverHasSideEffects = 1 in def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; @@ -715,7 +715,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v4f32 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS load and store -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; @@ -735,7 +735,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (movhp VR128:$src1, + (movlhps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -760,7 +760,7 @@ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1256,7 +1256,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR64 from f128mem using movapd. Upper bits are // disregarded. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; @@ -1494,7 +1494,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movhp VR128:$src1, + (v2f64 (movlhps VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -2085,7 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, [(set VR128:$dst, (v4i32 (pshufd:$src2 (bc_v4i32(memopv2i64 addr:$src1)), (undef))))]>; -} +} // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, @@ -2874,7 +2874,7 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), (PALIGNR128rr VR128:$src2, VR128:$src1, (SHUFFLE_get_palign_imm VR128:$src3))>, Requires<[HasSSSE3]>; -} +} def : Pat<(X86pshufb VR128:$src, VR128:$mask), (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; @@ -3035,7 +3035,7 @@ def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS @@ -3051,48 +3051,26 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS -// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; - def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS -// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), - (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - let AddedComplexity = 15 in { // Setting the lowest element in the vector. diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 62ca47f..0792bdd 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { // Rewrite the call target... so that we don't end up here every time we // execute the call. #if defined (X86_64_JIT) - if (!isStub) - *(intptr_t *)(RetAddr - 0xa) = NewVal; + assert(isStub && + "X86-64 doesn't support rewriting non-stub lazy compilation calls:" + " the call instruction varies too much."); #else *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4); #endif diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c5ff525..f577fcf 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -392,6 +392,11 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::SP); Reserved.set(X86::SPL); + // Set the instruction pointer register and its aliases as reserved. + Reserved.set(X86::RIP); + Reserved.set(X86::EIP); + Reserved.set(X86::IP); + // Set the frame-pointer register and its aliases as reserved if needed. if (hasFP(MF)) { Reserved.set(X86::RBP); @@ -450,12 +455,17 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + bool requiresRealignment = + RealignStack && (MFI->getMaxAlignment() > StackAlign); // FIXME: Currently we don't support stack realignment for functions with - // variable-sized allocas - return (RealignStack && - (MFI->getMaxAlignment() > StackAlign && - !MFI->hasVarSizedObjects())); + // variable-sized allocas. + // FIXME: Temporary disable the error - it seems to be too conservative. + if (0 && requiresRealignment && MFI->hasVarSizedObjects()) + llvm_report_error( + "Stack realignment in presense of dynamic allocas is not supported"); + + return (requiresRealignment && !MFI->hasVarSizedObjects()); } bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { @@ -610,8 +620,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Offset is a 32-bit integer. int Offset = getFrameIndexOffset(MF, FrameIndex) + (int)(MI.getOperand(i + 3).getImm()); - - MI.getOperand(i + 3).ChangeToImmediate(Offset); + + MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + @@ -647,7 +657,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // } // [EBP] MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta); + (-1U*SlotSize)+TailCallReturnAddrDelta, + true, false); } if (hasFP(MF)) { @@ -659,7 +670,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject(SlotSize, -(int)SlotSize + TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta); + TailCallReturnAddrDelta, + true, false); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; @@ -1271,7 +1283,7 @@ unsigned X86RegisterInfo::getRARegister() const { : X86::EIP; // Should have dwarf #8. } -unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? FramePtr : StackPtr; } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index f635707..f281a3c 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -153,7 +153,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; int getFrameIndexOffset(MachineFunction &MF, int FI) const; void getInitialFrameState(std::vector &Moves) const; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 9525f04..b901c14 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -18,8 +18,10 @@ #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/System/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; #if defined(_MSC_VER) @@ -257,118 +259,6 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } } -static const char *GetCurrentX86CPU() { - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) - return "generic"; - unsigned Family = 0; - unsigned Model = 0; - DetectFamilyModel(EAX, Family, Model); - - GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - bool Em64T = (EDX >> 29) & 0x1; - bool HasSSE3 = (ECX & 0x1); - - union { - unsigned u[3]; - char c[12]; - } text; - - GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); - if (memcmp(text.c, "GenuineIntel", 12) == 0) { - switch (Family) { - case 3: - return "i386"; - case 4: - return "i486"; - case 5: - switch (Model) { - case 4: return "pentium-mmx"; - default: return "pentium"; - } - case 6: - switch (Model) { - case 1: return "pentiumpro"; - case 3: - case 5: - case 6: return "pentium2"; - case 7: - case 8: - case 10: - case 11: return "pentium3"; - case 9: - case 13: return "pentium-m"; - case 14: return "yonah"; - case 15: - case 22: // Celeron M 540 - return "core2"; - case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE) - return "penryn"; - default: return "i686"; - } - case 15: { - switch (Model) { - case 3: - case 4: - case 6: // same as 4, but 65nm - return (Em64T) ? "nocona" : "prescott"; - case 26: - return "corei7"; - case 28: - return "atom"; - default: - return (Em64T) ? "x86-64" : "pentium4"; - } - } - - default: - return "generic"; - } - } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) { - // FIXME: this poorly matches the generated SubtargetFeatureKV table. There - // appears to be no way to generate the wide variety of AMD-specific targets - // from the information returned from CPUID. - switch (Family) { - case 4: - return "i486"; - case 5: - switch (Model) { - case 6: - case 7: return "k6"; - case 8: return "k6-2"; - case 9: - case 13: return "k6-3"; - default: return "pentium"; - } - case 6: - switch (Model) { - case 4: return "athlon-tbird"; - case 6: - case 7: - case 8: return "athlon-mp"; - case 10: return "athlon-xp"; - default: return "athlon"; - } - case 15: - if (HasSSE3) { - return "k8-sse3"; - } else { - switch (Model) { - case 1: return "opteron"; - case 5: return "athlon-fx"; // also opteron - default: return "athlon64"; - } - } - case 16: - return "amdfam10"; - default: - return "generic"; - } - } else { - return "generic"; - } -} - X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit) : PICStyle(PICStyles::None) @@ -395,7 +285,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, // Determine default and user specified characteristics if (!FS.empty()) { // If feature string is not empty, parse features string. - std::string CPU = GetCurrentX86CPU(); + std::string CPU = sys::getHostCPUName(); ParseSubtargetFeatures(FS, CPU); // All X86-64 CPUs also have SSE2, however user might request no SSE via // -mattr, so don't force SSELevel here. @@ -455,3 +345,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, if (StackAlignment) stackAlignment = StackAlignment; } + +bool X86Subtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtarget::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + return OptLevel >= CodeGenOpt::Default; +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e64b854..23f2841 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -166,11 +166,11 @@ public: std::string getDataLayout() const { const char *p; if (is64Bit()) - p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128"; + p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"; else if (isTargetDarwin()) - p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128"; + p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; else - p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"; + p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; return std::string(p); } @@ -219,10 +219,8 @@ public: /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling /// at 'More' optimization level. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& mode) const { - mode = TargetSubtarget::ANTIDEP_CRITICAL; - return OptLevel >= CodeGenOpt::Default; - } + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index a61de1c..0cda8bc 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -22,8 +22,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, - const StringRef &TT) { +static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: @@ -186,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86CodeEmitterPass(*this, MCE)); @@ -212,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86JITCodeEmitterPass(*this, JCE)); -- cgit v1.1