diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-03-03 17:27:15 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-03-03 17:27:15 +0000 |
commit | 8230c40430a1325b5cc5bc0221931487b4bd573c (patch) | |
tree | 836a05cff50ca46176117b86029f061fa4db54f0 /lib/Target/X86 | |
parent | f25ddd991a5601d0101602c4c263a58c7af4b8a2 (diff) | |
download | FreeBSD-src-8230c40430a1325b5cc5bc0221931487b4bd573c.zip FreeBSD-src-8230c40430a1325b5cc5bc0221931487b4bd573c.tar.gz |
Update LLVM to 97654.
Diffstat (limited to 'lib/Target/X86')
24 files changed, 702 insertions, 900 deletions
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 1a35a49..734a545 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -73,7 +73,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) { O << '$' << Op.getImm(); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) - *CommentStream << format("imm = 0x%X\n", Op.getImm()); + *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 61f26a7..eed3b45 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) set(sources + X86AsmBackend.cpp X86CodeEmitter.cpp X86COFFMachineModuleInfo.cpp X86ELFWriterInfo.cpp diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index 19eb05e..e5f84e8 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -67,8 +67,8 @@ no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit [ %tmp.34.i18, %no_exit.i7 ] %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] - %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00 - %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00 + %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00 + %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00 br i1 false, label %Compute_Tree.exit23, label %no_exit.i7 Compute_Tree.exit23: ; preds = %no_exit.i7 @@ -97,7 +97,7 @@ pcmp/pand/pandn/por to do a selection instead of a conditional branch: double %X(double %Y, double %Z, double %A, double %B) { %C = setlt double %A, %B - %z = add double %Z, 0.0 ;; select operand is not a load + %z = fadd double %Z, 0.0 ;; select operand is not a load %D = select bool %C, double %Y, double %z ret double %D } @@ -545,7 +545,7 @@ eliminates a constant pool load. For example, consider: define i64 @ccosf(float %z.0, float %z.1) nounwind readonly { entry: - %tmp6 = sub float -0.000000e+00, %z.1 ; <float> [#uses=1] + %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1] %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly ret i64 %tmp20 } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 3c6138b..d4545a6 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -227,11 +227,6 @@ lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor. //===---------------------------------------------------------------------===// -Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 / -FR64 to VR128. - -//===---------------------------------------------------------------------===// - Adding to the list of cmp / test poor codegen issues: int test(__m128 *A, __m128 *B) { diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 1a1e447..ba0ee6c 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -19,13 +19,15 @@ namespace llvm { -class X86TargetMachine; class FunctionPass; -class MachineCodeEmitter; +class JITCodeEmitter; +class MCAssembler; class MCCodeEmitter; class MCContext; -class JITCodeEmitter; +class MachineCodeEmitter; class Target; +class TargetAsmBackend; +class X86TargetMachine; class formatted_raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a @@ -55,6 +57,9 @@ MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM, MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM, MCContext &Ctx); +TargetAsmBackend *createX86_32AsmBackend(const Target &, MCAssembler &); +TargetAsmBackend *createX86_64AsmBackend(const Target &, MCAssembler &); + /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically /// allocated chunk of memory. diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp new file mode 100644 index 0000000..e6654ef --- /dev/null +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -0,0 +1,34 @@ +//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmBackend.h" +#include "X86.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmBackend.h" +using namespace llvm; + +namespace { + +class X86AsmBackend : public TargetAsmBackend { +public: + X86AsmBackend(const Target &T, MCAssembler &A) + : TargetAsmBackend(T) {} +}; + +} + +TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, + MCAssembler &A) { + return new X86AsmBackend(T, A); +} + +TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, + MCAssembler &A) { + return new X86AsmBackend(T, A); +} diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 69a9d60..17366ee 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1161,6 +1161,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { if (!X86SelectAddress(DI->getAddress(), AM)) return false; const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + // FIXME may need to add RegState::Debug to any registers produced, + // although ESP/EBP should be the only ones at the moment. addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). addMetadata(DI->getVariable()); return true; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 7b349f6..08030e0 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,15 +12,6 @@ // //===----------------------------------------------------------------------===// -// Force NDEBUG on in any optimized build on Darwin. -// -// FIXME: This is a huge hack, to work around ridiculously awful compile times -// on this file with gcc-4.2 on Darwin, in Release mode. -#if (!defined(__llvm__) && defined(__APPLE__) && \ - defined(__OPTIMIZE__) && !defined(NDEBUG)) -#define NDEBUG -#endif - #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" @@ -177,15 +168,11 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } - /// InstructionSelect - This callback is invoked by - /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. - virtual void InstructionSelect(); - virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; - virtual bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const; + virtual void PreprocessISelDAG(); // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -209,18 +196,17 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp); - bool SelectScalarSSELoad(SDNode *Op, SDValue Pred, - SDValue N, SDValue &Base, SDValue &Scale, + bool SelectScalarSSELoad(SDNode *Root, SDValue N, + SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, SDValue &OutChain); + SDValue &NodeWithChain); + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - void PreprocessForRMW(); - void PreprocessForFPConvert(); - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -296,10 +282,6 @@ namespace { const X86InstrInfo *getInstrInfo() { return getTargetMachine().getInstrInfo(); } - -#ifndef NDEBUG - unsigned Indent; -#endif }; } @@ -367,65 +349,6 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { return true; } - -bool X86DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const { - if (OptLevel == CodeGenOpt::None) return false; - - // Proceed to 'generic' cycle finder code - return SelectionDAGISel::IsLegalToFold(N, U, Root); -} - -/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand -/// and move load below the TokenFactor. Replace store's chain operand with -/// load's chain result. -static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, - SDValue Store, SDValue TF) { - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) - if (Load.getNode() == TF.getOperand(i).getNode()) - Ops.push_back(Load.getOperand(0)); - else - Ops.push_back(TF.getOperand(i)); - SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, - Load.getOperand(1), - Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), - Store.getOperand(2), Store.getOperand(3)); -} - -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The -/// chain produced by the load must only be used by the store's chain operand, -/// otherwise this may produce a cycle in the DAG. -/// -static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, - SDValue &Load) { - if (N.getOpcode() == ISD::BIT_CONVERT) { - if (!N.hasOneUse()) - return false; - N = N.getOperand(0); - } - - LoadSDNode *LD = dyn_cast<LoadSDNode>(N); - if (!LD || LD->isVolatile()) - return false; - if (LD->getAddressingMode() != ISD::UNINDEXED) - return false; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) - return false; - - if (N.hasOneUse() && - LD->hasNUsesOfValue(1, 1) && - N.getOperand(1) == Address && - LD->isOperandOf(Chain.getNode())) { - Load = N; - return true; - } - return false; -} - /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain /// operand and move load below the call's chain operand. static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, @@ -489,51 +412,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { return false; } - -/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. -/// This is only run if not in -O0 mode. -/// This allows the instruction selector to pick more read-modify-write -/// instructions. This is a common case: -/// -/// [Load chain] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// / \- -/// / | -/// [TokenFactor] [Op] -/// ^ ^ -/// | | -/// \ / -/// \ / -/// [Store] -/// -/// The fact the store's chain operand != load's chain will prevent the -/// (store (op (load))) instruction from being selected. We can transform it to: -/// -/// [Load chain] -/// ^ -/// | -/// [TokenFactor] -/// ^ -/// | -/// [Load] -/// ^ ^ -/// | | -/// | \- -/// | | -/// | [Op] -/// | ^ -/// | | -/// \ / -/// \ / -/// [Store] -void X86DAGToDAGISel::PreprocessForRMW() { +void X86DAGToDAGISel::PreprocessISelDAG() { + OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - if (I->getOpcode() == X86ISD::CALL) { + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) { /// Also try moving call address load from outside callseq_start to just /// before the call to allow it to be folded. /// @@ -553,85 +439,23 @@ void X86DAGToDAGISel::PreprocessForRMW() { /// \ / /// \ / /// [CALL] - SDValue Chain = I->getOperand(0); - SDValue Load = I->getOperand(1); + SDValue Chain = N->getOperand(0); + SDValue Load = N->getOperand(1); if (!isCalleeLoad(Load, Chain)) continue; - MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); + MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain); ++NumLoadMoved; continue; } - - if (!ISD::isNON_TRUNCStore(I)) - continue; - SDValue Chain = I->getOperand(0); - - if (Chain.getNode()->getOpcode() != ISD::TokenFactor) - continue; - - SDValue N1 = I->getOperand(1); - SDValue N2 = I->getOperand(2); - if ((N1.getValueType().isFloatingPoint() && - !N1.getValueType().isVector()) || - !N1.hasOneUse()) - continue; - - bool RModW = false; - SDValue Load; - unsigned Opcode = N1.getNode()->getOpcode(); - switch (Opcode) { - case ISD::ADD: - case ISD::MUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: - case ISD::VECTOR_SHUFFLE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - RModW = isRMWLoad(N10, Chain, N2, Load); - if (!RModW) - RModW = isRMWLoad(N11, Chain, N2, Load); - break; - } - case ISD::SUB: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: - case ISD::ROTR: - case ISD::SUBC: - case ISD::SUBE: - case X86ISD::SHLD: - case X86ISD::SHRD: { - SDValue N10 = N1.getOperand(0); - RModW = isRMWLoad(N10, Chain, N2, Load); - break; - } - } - - if (RModW) { - MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); - ++NumLoadMoved; - checkForCycles(I); - } - } -} - - -/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend -/// nodes that target the FP stack to be store and load to the stack. This is a -/// gross hack. We would like to simply mark these as being illegal, but when -/// we do that, legalize produces these when it expands calls, then expands -/// these in the same legalize pass. We would like dag combine to be able to -/// hack on these between the call expansion and the node legalization. As such -/// this pass basically does "really late" legalization of these inline with the -/// X86 isel pass. -void X86DAGToDAGISel::PreprocessForFPConvert() { - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ) { - SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + // Lower fpround and fpextend nodes that target the FP stack to be store and + // load to the stack. This is a gross hack. We would like to simply mark + // these as being illegal, but when we do that, legalize produces these when + // it expands calls, then expands these in the same legalize pass. We would + // like dag combine to be able to hack on these between the call expansion + // and the node legalization. As such this pass basically does "really + // late" legalization of these inline with the X86 isel pass. + // FIXME: This should only happen when not compiled with -O0. if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; @@ -687,30 +511,6 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { } } -/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel -/// when it has created a SelectionDAG for us to codegen. -void X86DAGToDAGISel::InstructionSelect() { - const Function *F = MF->getFunction(); - OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); - - if (OptLevel != CodeGenOpt::None) - PreprocessForRMW(); - - // FIXME: This should only happen when not compiled with -O0. - PreprocessForFPConvert(); - - // Codegen the basic block. -#ifndef NDEBUG - DEBUG(dbgs() << "===== Instruction selection begins:\n"); - Indent = 0; -#endif - SelectRoot(*CurDAG); -#ifndef NDEBUG - DEBUG(dbgs() << "===== Instruction selection ends:\n"); -#endif - - CurDAG->RemoveDeadNodes(); -} /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in /// the main function. @@ -1317,22 +1117,24 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to /// match a load whose top elements are either undef or zeros. The load flavor /// is derived from the type of N, which is either v4f32 or v2f64. -bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, +/// +/// We also return: +/// PatternChainNode: this is the matched node that has a chain input and +/// output. +bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, - SDValue &InChain, - SDValue &OutChain) { + SDValue &PatternNodeWithChain) { if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { - InChain = N.getOperand(0).getValue(1); - if (ISD::isNON_EXTLoad(InChain.getNode()) && - InChain.getValue(0).hasOneUse() && - IsProfitableToFold(N, Pred.getNode(), Op) && - IsLegalToFold(N, Pred.getNode(), Op)) { - LoadSDNode *LD = cast<LoadSDNode>(InChain); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + PatternNodeWithChain = N.getOperand(0); + if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && + PatternNodeWithChain.hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) return false; - OutChain = LD->getChain(); return true; } } @@ -1344,13 +1146,14 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred, N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && - N.getOperand(0).getOperand(0).hasOneUse()) { + N.getOperand(0).getOperand(0).hasOneUse() && + IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && + IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); - if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) + if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) return false; - OutChain = LD->getChain(); - InChain = SDValue(LD, 1); + PatternNodeWithChain = SDValue(LD, 0); return true; } return false; @@ -1424,7 +1227,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp) { - assert(Op->getOpcode() == X86ISD::TLSADDR); assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); @@ -1451,11 +1253,12 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { - if (ISD::isNON_EXTLoad(N.getNode()) && - IsProfitableToFold(N, P, P) && - IsLegalToFold(N, P, P)) - return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); - return false; + if (!ISD::isNON_EXTLoad(N.getNode()) || + !IsProfitableToFold(N, P, P) || + !IsLegalToFold(N, P, P)) + return false; + + return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); } /// getGlobalBaseReg - Return an SDNode that returns the value of @@ -1558,7 +1361,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC16m; else if (isSub) { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB16mi8; else Opc = X86::LOCK_SUB16mi; @@ -1566,7 +1369,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB16mr; } else { if (isCN) { - if (Predicate_i16immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD16mi8; else Opc = X86::LOCK_ADD16mi; @@ -1581,7 +1384,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_DEC32m; else if (isSub) { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB32mi8; else Opc = X86::LOCK_SUB32mi; @@ -1589,7 +1392,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { Opc = X86::LOCK_SUB32mr; } else { if (isCN) { - if (Predicate_i32immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD32mi8; else Opc = X86::LOCK_ADD32mi; @@ -1605,7 +1408,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { else if (isSub) { Opc = X86::LOCK_SUB64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_SUB64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_SUB64mi32; @@ -1613,7 +1416,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { } else { Opc = X86::LOCK_ADD64mr; if (isCN) { - if (Predicate_i64immSExt8(Val.getNode())) + if (Predicate_immSext8(Val.getNode())) Opc = X86::LOCK_ADD64mi8; else if (Predicate_i64immSExt32(Val.getNode())) Opc = X86::LOCK_ADD64mi32; @@ -1710,24 +1513,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent, ' ') << "Selecting: "; - Node->dump(CurDAG); - dbgs() << '\n'; - }); - Indent += 2; -#endif + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "== "; - Node->dump(CurDAG); - dbgs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); return NULL; // Already selected. } @@ -1823,13 +1612,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -1852,19 +1635,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -1979,13 +1752,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { LoReg, NVT, InFlag); InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 0), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { @@ -2009,19 +1776,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { InFlag = Result.getValue(2); } ReplaceUses(SDValue(Node, 1), Result); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - Result.getNode()->dump(CurDAG); - dbgs() << '\n'; - }); -#endif + DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - -#ifndef NDEBUG - Indent -= 2; -#endif - return NULL; } @@ -2134,17 +1890,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = SelectCode(Node); -#ifndef NDEBUG - DEBUG({ - dbgs() << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - dbgs() << '\n'; - }); - Indent -= 2; -#endif + DEBUG(dbgs() << "=> "; + if (ResNode == NULL || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << '\n'); return ResNode; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9974d8c..e2b8193 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -73,7 +73,7 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { case X86Subtarget::isDarwin: if (TM.getSubtarget<X86Subtarget>().is64Bit()) return new X8664_MachoTargetObjectFile(); - return new X8632_MachoTargetObjectFile(); + return new TargetLoweringObjectFileMachO(); case X86Subtarget::isELF: if (TM.getSubtarget<X86Subtarget>().is64Bit()) return new X8664_ELFTargetObjectFile(TM); @@ -990,6 +990,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); @@ -1743,7 +1744,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, @@ -2376,7 +2377,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - true, false); + false, false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -4816,8 +4817,16 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && isa<ConstantSDNode>(N2)) { - unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB - : X86ISD::PINSRW; + unsigned Opc; + if (VT == MVT::v8i16) + Opc = X86ISD::PINSRW; + else if (VT == MVT::v4i16) + Opc = X86ISD::MMX_PINSRW; + else if (VT == MVT::v16i8) + Opc = X86ISD::PINSRB; + else + Opc = X86ISD::PINSRB; + // Transform it so it match pinsr{b,w} which expects a GR32 as its second // argument. if (N1.getValueType() != MVT::i32) @@ -4868,7 +4877,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); - return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); + return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, + dl, VT, N0, N1, N2); } return SDValue(); } @@ -5244,7 +5254,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, DAG.getConstant(VTBits, MVT::i8)); - SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT, + SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, AndNode, DAG.getConstant(0, MVT::i8)); SDValue Hi, Lo; @@ -5873,26 +5883,31 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. -static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, +static SDValue LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { + SDValue Op0 = And.getOperand(0); + SDValue Op1 = And.getOperand(1); + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + if (Op1.getOpcode() == ISD::TRUNCATE) + Op1 = Op1.getOperand(0); + SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); + if (Op1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0))) + if (And10C->getZExtValue() == 1) { + LHS = Op0; + RHS = Op1.getOperand(1); } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); + } else if (Op0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0))) + if (And00C->getZExtValue() == 1) { + LHS = Op1; + RHS = Op0.getOperand(1); } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); + } else if (Op1.getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); + SDValue AndLHS = Op0; if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { LHS = AndLHS.getOperand(0); RHS = AndLHS.getOperand(1); @@ -5942,6 +5957,21 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { return NewSetCC; } + // Look for "(setcc) == / != 1" to avoid unncessary setcc. + if (Op0.getOpcode() == X86ISD::SETCC && + Op1.getOpcode() == ISD::Constant && + (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + cast<ConstantSDNode>(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); + bool Invert = (CC == ISD::SETNE) ^ + cast<ConstantSDNode>(Op1)->isNullValue(); + if (Invert) + CCode = X86::GetOppositeBranchCondition(CCode); + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); + } + bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) @@ -6444,8 +6474,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl, - DAG.GetOrdering(Chain.getNode())); + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -7662,6 +7691,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; + case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; @@ -7769,7 +7799,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); if (NumBits1 <= NumBits2) return false; - return Subtarget->is64Bit() || NumBits1 < 64; + return true; } bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { @@ -7779,7 +7809,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { unsigned NumBits2 = VT2.getSizeInBits(); if (NumBits1 <= NumBits2) return false; - return Subtarget->is64Bit() || NumBits1 < 64; + return true; } bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { @@ -8792,10 +8822,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(2); // If we have SSE[12] support, try to form min/max nodes. SSE min/max - // instructions have the peculiarity that if either operand is a NaN, - // they chose what we call the RHS operand (and as such are not symmetric). - // It happens that this matches the semantics of the common C idiom - // x<y?x:y and related forms, so we can recognize these cases. + // instructions match the semantics of the common C idiom x<y?x:y but not + // x<=y?x:y, because of how they handle negative zero (which can be + // ignored in unsafe-math mode). if (Subtarget->hasSSE2() && (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && Cond.getOpcode() == ISD::SETCC) { @@ -8803,36 +8832,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode = 0; // Check for x CC y ? x : y. - if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { + if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && + DAG.isEqualTo(RHS, Cond.getOperand(1))) { switch (CC) { default: break; case ISD::SETULT: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a min would handle NaNs incorrectly, and swapping + // the operands would cause it to handle comparisons between positive + // and negative zero incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETOLE: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a min would handle comparisons between positive + // and negative zero incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) + break; Opcode = X86ISD::FMIN; break; case ISD::SETULE: - // This can be a min, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a min would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: @@ -8841,32 +8868,29 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETOGE: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a max would handle comparisons between positive + // and negative zero incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS)) + break; Opcode = X86ISD::FMAX; break; case ISD::SETUGT: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a max would handle NaNs incorrectly, and swapping + // the operands would cause it to handle comparisons between positive + // and negative zero incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETUGE: - // This can be a max, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a max would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: @@ -8875,36 +8899,33 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; } // Check for x CC y ? y : x -- a min/max with reversed arms. - } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { + } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && + DAG.isEqualTo(RHS, Cond.getOperand(0))) { switch (CC) { default: break; case ISD::SETOGE: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a min would handle comparisons between positive + // and negative zero incorrectly, and swapping the operands would + // cause it to handle NaNs incorrectly. + if (!UnsafeFPMath && + !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMIN; break; case ISD::SETUGT: - // This can be a min if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a min would handle NaNs incorrectly. + if (!UnsafeFPMath && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + break; Opcode = X86ISD::FMIN; break; case ISD::SETUGE: - // This can be a min, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a min would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOGT: case ISD::SETGT: @@ -8913,32 +8934,28 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; case ISD::SETULT: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(LHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(RHS)) - break; - } + // Converting this to a max would handle NaNs incorrectly. + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) + break; Opcode = X86ISD::FMAX; break; case ISD::SETOLE: - // This can be a max if we can prove that at least one of the operands - // is not a nan. - if (!FiniteOnlyFPMath()) { - if (DAG.isKnownNeverNaN(RHS)) { - // Put the potential NaN in the RHS so that SSE will preserve it. - std::swap(LHS, RHS); - } else if (!DAG.isKnownNeverNaN(LHS)) + // Converting this to a max would handle comparisons between positive + // and negative zero incorrectly, and swapping the operands would + // cause it to handle NaNs incorrectly. + if (!UnsafeFPMath && + !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { + if (!FiniteOnlyFPMath() && + (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) break; + std::swap(LHS, RHS); } Opcode = X86ISD::FMAX; break; case ISD::SETULE: - // This can be a max, but if either operand is a NaN we need it to - // preserve the original LHS. + // Converting this to a max would handle both negative zeros and NaNs + // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); case ISD::SETOLT: case ISD::SETLT: @@ -9157,16 +9174,64 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformANDCombine - Look for SSE and instructions of this form: +/// (and x, (build_vector signbit,signbit,signbit,signbit)). If there +/// exists a use of a build_vector that's the bitwise complement of the mask, +/// then transform the node to +/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~sb,~sb,~sb,~sb)). +static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + if (!VT.isVector() || !VT.isInteger()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse()) + return SDValue(); + + if (N1.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SmallVector<SDValue, 8> Mask; + Mask.reserve(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Arg = N1.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) { + Mask.push_back(Arg); + continue; + } + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Arg); + if (!C) + return SDValue(); + if (!C->getAPIntValue().isSignBit() && + !C->getAPIntValue().isMaxSignedValue()) + return SDValue(); + Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT)); + } + N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT, + &Mask[0], NumElts); + if (!N1.use_empty()) { + unsigned Bits = EltVT.getSizeInBits(); + Mask.clear(); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT)); + SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VT, &Mask[0], NumElts); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + N0, NewMask), N1); + } + } + + return SDValue(); +} /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. /// LEA + SHL, LEA + LEA. static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { - if (DAG.getMachineFunction(). - getFunction()->hasFnAttr(Attribute::OptimizeForSize)) - return SDValue(); - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); @@ -9305,7 +9370,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); + unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); if (C->getZExtValue() == SplatIdx) BaseShAmt = InVec.getOperand(1); } @@ -9690,6 +9755,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); + case ISD::AND: return PerformANDCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index cf0eb40..ffaf1cf 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -180,7 +180,7 @@ namespace llvm { /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, /// corresponds to X86::PINSRW. - PINSRW, + PINSRW, MMX_PINSRW, /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 4ea3739..8462255 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -59,10 +59,11 @@ def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", // Pattern fragments. // -def i64immSExt8 : PatLeaf<(i64 imm), [{ - // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit - // sign extended field. - return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def i64immSExt8 : PatLeaf<(i64 immSext8)>; + +def GetLo32XForm : SDNodeXForm<imm, [{ + // Transformation function: get the low 32 bits. + return getI32Imm((unsigned)N->getZExtValue()); }]>; def i64immSExt32 : PatLeaf<(i64 imm), [{ @@ -71,6 +72,7 @@ def i64immSExt32 : PatLeaf<(i64 imm), [{ return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); }]>; + def i64immZExt32 : PatLeaf<(i64 imm), [{ // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit // unsignedsign extended field. @@ -325,7 +327,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))]>; @@ -556,7 +558,7 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), addr:$dst)]>; def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", - [(store (adde (load addr:$dst), i64immSExt8:$src2), + [(store (adde (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; } // Uses = [EFLAGS] @@ -1981,7 +1983,7 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (i64 0), (AND32ri (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), - imm:$imm), + (i32 (GetLo32XForm imm:$imm))), x86_subreg_32bit)>; // r & (2^32-1) ==> movz @@ -2105,34 +2107,34 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; // (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL:$amt, 63)), +def : Pat<(shl GR64:$src1, (and CL, 63)), (SHL64rCL GR64:$src1)>; -def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHL64mCL addr:$dst)>; -def : Pat<(srl GR64:$src1, (and CL:$amt, 63)), +def : Pat<(srl GR64:$src1, (and CL, 63)), (SHR64rCL GR64:$src1)>; -def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHR64mCL addr:$dst)>; -def : Pat<(sra GR64:$src1, (and CL:$amt, 63)), +def : Pat<(sra GR64:$src1, (and CL, 63)), (SAR64rCL GR64:$src1)>; -def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst), +def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; // Double shift patterns -def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm:$amt2)), addr:$dst), + GR64:$src2, (i8 imm)), addr:$dst), (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm:$amt2)), addr:$dst), + GR64:$src2, (i8 imm)), addr:$dst), (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index e22a903..ae24bfb 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -397,7 +397,7 @@ def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins), let canFoldAsLoad = 1 in { def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP, [(set RFP32:$dst, (loadf32 addr:$src))]>; -let isReMaterializable = 1, mayHaveSideEffects = 1 in +let isReMaterializable = 1 in def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP, [(set RFP64:$dst, (loadf64 addr:$src))]>; def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a0d0312..39bda04 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, { X86::MUL16r, X86::MUL16m, 1, 0 }, @@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, { X86::MOVDQArr, X86::MOVDQArm, 16 }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, - { X86::MOVSDrr, X86::MOVSDrm, 0 }, { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, - { X86::MOVSSrr, X86::MOVSSrm, 0 }, { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, @@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, case X86::MOV16rr: case X86::MOV32rr: case X86::MOV64rr: - case X86::MOVSSrr: - case X86::MOVSDrr: // FP Stack register class copies case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: case X86::MOV_Fp3264: case X86::MOV_Fp3280: case X86::MOV_Fp6432: case X86::MOV_Fp8032: - + + // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64 + // copies are done with FsMOVAPSrr and FsMOVAPDrr. + case X86::FsMOVAPSrr: case X86::FsMOVAPDrr: case X86::MOVAPSrr: case X86::MOVAPDrr: case X86::MOVDQArr: - case X86::MOVSS2PSrr: - case X86::MOVSD2PDrr: - case X86::MOVPS2SSrr: - case X86::MOVPD2SDrr: case X86::MMX_MOVQ64rr: assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && @@ -1083,7 +1073,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri; break; + case X86::MOV64r0: Opc = X86::MOV64ri64i32; break; } Clone = false; } @@ -1860,7 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, CommonRC = SrcRC; else if (!DestRC->hasSubClass(SrcRC)) { // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, - // but we want to copy then as GR64. Similarly, for GR32_NOREX and + // but we want to copy them as GR64. Similarly, for GR32_NOREX and // GR32_NOSP, copy as GR32. if (SrcRC->hasSuperClass(&X86::GR64RegClass) && DestRC->hasSuperClass(&X86::GR64RegClass)) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 25cd297..cfe71a5 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -343,18 +343,37 @@ def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; -def i16immSExt8 : PatLeaf<(i16 imm), [{ - // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit - // sign extended field. - return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def immSext8 : PatLeaf<(imm), [{ + return N->getSExtValue() == (int8_t)N->getSExtValue(); }]>; -def i32immSExt8 : PatLeaf<(i32 imm), [{ - // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit - // sign extended field. - return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +def i16immSExt8 : PatLeaf<(i16 immSext8)>; +def i32immSExt8 : PatLeaf<(i32 immSext8)>; + +/// Load patterns: these constraint the match to the right address space. +def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + if (PT->getAddressSpace() > 255) + return false; + return true; +}]>; + +def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return PT->getAddressSpace() == 256; + return false; +}]>; + +def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) + if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) + return PT->getAddressSpace() == 257; + return false; }]>; + // Helper fragments for loads. // It's always safe to treat a anyext i16 load as a i32 load if the i16 is // known to be 32-bit aligned or better. Ditto for i8 to i16. @@ -372,8 +391,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{ return false; }]>; -def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), -[{ +def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{ LoadSDNode *LD = cast<LoadSDNode>(N); if (const Value *Src = LD->getSrcValue()) if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) @@ -399,72 +417,11 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ return false; }]>; -def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{ - LoadSDNode *LD = cast<LoadSDNode>(N); - if (const Value *Src = LD->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - if (LD->isVolatile()) - return false; - ISD::LoadExtType ExtType = LD->getExtensionType(); - if (ExtType == ISD::NON_EXTLOAD) - return true; - if (ExtType == ISD::EXTLOAD) - return LD->getAlignment() >= 4; - return false; -}]>; - -def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 256; - return false; -}]>; - -def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - return PT->getAddressSpace() == 257; - return false; -}]>; - -def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; - -def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{ - if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue()) - if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) - if (PT->getAddressSpace() > 255) - return false; - return true; -}]>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; @@ -1037,7 +994,7 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))]>; @@ -1071,7 +1028,7 @@ def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; let mayLoad = 1, - canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in + canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>; @@ -1156,7 +1113,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), } // neverHasSideEffects // unsigned division/remainder -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AX,EFLAGS], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -4442,12 +4399,6 @@ def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; -// (and (i32 load), 255) -> (zextload i8) -def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))), - (MOVZX32rm8 addr:$src)>; -def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))), - (MOVZX32rm16 addr:$src)>; - //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// @@ -4543,43 +4494,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; // (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR8:$src1, (and CL, 31)), (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR16:$src1, (and CL, 31)), (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL:$amt, 31)), +def : Pat<(shl GR32:$src1, (and CL, 31)), (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SHL32mCL addr:$dst)>; -def : Pat<(srl GR8:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR8:$src1, (and CL, 31)), (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR16:$src1, (and CL, 31)), (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL:$amt, 31)), +def : Pat<(srl GR32:$src1, (and CL, 31)), (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SHR32mCL addr:$dst)>; -def : Pat<(sra GR8:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR8:$src1, (and CL, 31)), (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR16:$src1, (and CL, 31)), (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL:$amt, 31)), +def : Pat<(sra GR32:$src1, (and CL, 31)), (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst), +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SAR32mCL addr:$dst)>; // (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) @@ -4600,11 +4551,11 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), addr:$dst), (SHRD32mrCL addr:$dst, GR32:$src2)>; -def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm:$amt2)), addr:$dst), + GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; // (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) @@ -4625,11 +4576,11 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), addr:$dst), (SHLD32mrCL addr:$dst, GR32:$src2)>; -def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm:$amt2)), addr:$dst), + GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; // (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) @@ -4650,11 +4601,11 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), addr:$dst), (SHRD16mrCL addr:$dst, GR16:$src2)>; -def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), +def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm:$amt2)), addr:$dst), + GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; // (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) @@ -4675,11 +4626,11 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), addr:$dst), (SHLD16mrCL addr:$dst, GR16:$src2)>; -def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), +def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm:$amt2)), addr:$dst), + GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; // (anyext (setcc_carry)) -> (setcc_carry) diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 89f020c..c8e0723 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -141,7 +141,7 @@ def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))]>; @@ -426,13 +426,15 @@ def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), // Extract / Insert -def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>; -def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>; +def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW", + SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; + def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1), + [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1), (iPTR imm:$src2)))]>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, @@ -597,13 +599,6 @@ let AddedComplexity = 10 in { (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } -// Patterns to perform vector shuffling with a zeroed out vector. -let AddedComplexity = 20 in { - def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV, - (v2i32 (scalar_to_vector (load_mmx addr:$src))))), - (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>; -} - // Some special case PANDN patterns. // FIXME: Get rid of these. def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9b2140f..2743dba 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -160,6 +160,32 @@ def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; +// MOVNT Support +// Like 'store', but requires the non-temporal bit to be set +def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal(); + return false; +}]>; + +def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED && + ST->getAlignment() >= 16; + return false; +}]>; + +def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && + ST->getAlignment() < 16; + return false; +}]>; + def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; @@ -344,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { // SSE1 Instructions //===----------------------------------------------------------------------===// -// Move Instructions -let neverHasSideEffects = 1 in -def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movss\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +// Move Instructions. Register-to-register movss is not used for FR32 +// register copies because it's a partial register update; FsMOVAPSrr is +// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG +// because INSERT_SUBREG requires that the insert be implementable in terms of +// a copy, and just mentioned, we don't use movss for copies. +let Constraints = "$src1 = $dst" in +def MOVSSrr : SSI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), + "movss\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>; + +// Extract the low 32-bit value from one vector and insert it into another. +let AddedComplexity = 15 in +def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), + (MOVSSrr VR128:$src1, + (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>; + +// Implicitly promote a 32-bit scalar to a vector. +def : Pat<(v4f32 (scalar_to_vector FR32:$src)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>; + +// Loading from memory automatically zeroing upper bits. +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), "movss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (loadf32 addr:$src))]>; + +// MOVSSrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +let AddedComplexity = 20 in { +def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; +} + +// Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; +// Extract and store. +def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSSmr addr:$dst, + (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + // Conversion instructions def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), "cvttss2si\t{$src, $dst|$dst, $src}", @@ -518,7 +582,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), // Alias instruction to load FR32 from f128mem using movaps. Upper bits are // disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; @@ -715,7 +779,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, let neverHasSideEffects = 1 in def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>; @@ -727,7 +791,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), let neverHasSideEffects = 1 in def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; @@ -736,7 +800,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v4f32 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS load and store -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; @@ -796,9 +860,9 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), let AddedComplexity = 20 in { def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (MOVLHPSrr VR128:$src, VR128:$src)>; def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (MOVLHPSrr VR128:$src, VR128:$src)>; } @@ -1013,10 +1077,33 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; // Non-temporal stores -def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), +def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQ_64mr VR128:$src, addr:$dst)>; + +def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; + +def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; +} + // Load, store, and memory fence def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>; @@ -1035,84 +1122,73 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllZerosV))]>; -let Predicates = [HasSSE1] in { - def : Pat<(v2i64 immAllZerosV), (V_SET0)>; - def : Pat<(v8i16 immAllZerosV), (V_SET0)>; - def : Pat<(v16i8 immAllZerosV), (V_SET0)>; - def : Pat<(v2f64 immAllZerosV), (V_SET0)>; - def : Pat<(v4f32 immAllZerosV), (V_SET0)>; -} - -// FR32 to 128-bit vector conversion. -let isAsCheapAsAMove = 1 in -def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector FR32:$src)))]>; -def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>; - -// FIXME: may not be able to eliminate this movss with coalescing the src and -// dest register classes are different. We really want to write this pattern -// like this: -// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), -// (f32 FR32:$src)>; -let isAsCheapAsAMove = 1 in -def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (vector_extract (v4f32 VR128:$src), - (iPTR 0)))]>; -def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store (f32 (vector_extract (v4f32 VR128:$src), - (iPTR 0))), addr:$dst)]>; - - -// Move to lower bits of a VR128, leaving upper bits alone. -// Three operand (but two address) aliases. -let Constraints = "$src1 = $dst" in { -let neverHasSideEffects = 1 in - def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", []>; - - let AddedComplexity = 15 in - def MOVLPSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "movss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movl VR128:$src1, VR128:$src2)))]>; -} +def : Pat<(v2i64 immAllZerosV), (V_SET0)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0)>; +def : Pat<(v2f64 immAllZerosV), (V_SET0)>; +def : Pat<(v4f32 immAllZerosV), (V_SET0)>; -// Move to lower bits of a VR128 and zeroing upper bits. -// Loading from memory automatically zeroing upper bits. -let AddedComplexity = 20 in -def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector - (loadf32 addr:$src))))))]>; - -def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (MOVZSS2PSrm addr:$src)>; +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; //===---------------------------------------------------------------------===// // SSE2 Instructions //===---------------------------------------------------------------------===// -// Move Instructions -let neverHasSideEffects = 1 in -def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +// Move Instructions. Register-to-register movsd is not used for FR64 +// register copies because it's a partial register update; FsMOVAPDrr is +// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG +// because INSERT_SUBREG requires that the insert be implementable in terms of +// a copy, and just mentioned, we don't use movsd for copies. +let Constraints = "$src1 = $dst" in +def MOVSDrr : SDI<0x10, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), + "movsd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>; + +// Extract the low 64-bit value from one vector and insert it into another. +let AddedComplexity = 15 in +def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, + (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>; + +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>; + +// Loading from memory automatically zeroing upper bits. +let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), "movsd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (loadf64 addr:$src))]>; + +// MOVSDrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +let AddedComplexity = 20 in { +def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; +} + +// Store scalar value to memory. def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; +// Extract and store. +def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + // Conversion instructions def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), "cvttsd2si\t{$src, $dst|$dst, $src}", @@ -1166,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), Requires<[HasSSE2, OptForSize]>; def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; + (CVTSS2SDrr (MOVSSrm addr:$src))>, + Requires<[HasSSE2, OptForSpeed]>; // Match intrinsics which expect XMM operand(s). def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), @@ -1285,7 +1362,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR64 from f128mem using movapd. Upper bits are // disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; @@ -1483,7 +1560,7 @@ defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, let neverHasSideEffects = 1 in def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>; @@ -2298,17 +2375,30 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; // Non-temporal stores -def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; -def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; -def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), +def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; +def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; +def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti\t{$src, $dst|$dst, $src}", [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, TB, Requires<[HasSSE2]>; +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; + +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), + (MOVNTDQmr VR128:$src, addr:$dst)>; +} + // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, @@ -2321,11 +2411,11 @@ def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; //TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 0)), (NOOP)>; def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)), (MFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. @@ -2337,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; -// FR64 to 128-bit vector conversion. -let isAsCheapAsAMove = 1 in -def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector FR64:$src)))]>; -def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; - def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2376,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)]>; -// FIXME: may not be able to eliminate this movss with coalescing the src and -// dest register classes are different. We really want to write this pattern -// like this: -// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), -// (f32 FR32:$src)>; -let isAsCheapAsAMove = 1 in -def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (vector_extract (v2f64 VR128:$src), - (iPTR 0)))]>; -def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2406,44 +2474,11 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; - -// Move to lower bits of a VR128, leaving upper bits alone. -// Three operand (but two address) aliases. -let Constraints = "$src1 = $dst" in { - let neverHasSideEffects = 1 in - def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", []>; - - let AddedComplexity = 15 in - def MOVLPDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movl VR128:$src1, VR128:$src2)))]>; -} - // Store / copy lower 64-bits of a XMM register. def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; -// Move to lower bits of a VR128 and zeroing upper bits. -// Loading from memory automatically zeroing upper bits. -let AddedComplexity = 20 in { -def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2f64 (X86vzmovl (v2f64 (scalar_to_vector - (loadf64 addr:$src))))))]>; - -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (MOVZSD2PDrm addr:$src)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (MOVZSD2PDrm addr:$src)>; -def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>; -} - // movd / movq to XMM register zero-extends let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), @@ -2989,13 +3024,15 @@ let Predicates = [HasSSE2] in { let AddedComplexity = 15 in { // Zeroing a VR128 then do a MOVS{S|D} to the lower bits. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>; + (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), - (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4f32 (V_SET0)), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>; + (MOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; } // Splat v2f64 / v2i64 @@ -3013,8 +3050,7 @@ def : Pat<(unpckh (v2i64 VR128:$src), (undef)), // Special unary SHUFPSrri case. def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))), (SHUFPSrri VR128:$src1, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, - Requires<[HasSSE1]>; + (SHUFFLE_get_shuf_imm VR128:$src3))>; let AddedComplexity = 5 in def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>, @@ -3060,13 +3096,13 @@ def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))), } let AddedComplexity = 10 in { def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))), - (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (UNPCKLPSrr VR128:$src, VR128:$src)>; def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLBWrr VR128:$src, VR128:$src)>; def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLWDrr VR128:$src, VR128:$src)>; def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))), - (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKLDQrr VR128:$src, VR128:$src)>; } // vector_shuffle v1, <undef>, <2, 2, 3, 3, ...> @@ -3080,13 +3116,13 @@ def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))), } let AddedComplexity = 10 in { def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))), - (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>; + (UNPCKHPSrr VR128:$src, VR128:$src)>; def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHBWrr VR128:$src, VR128:$src)>; def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHWDrr VR128:$src, VR128:$src)>; def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), - (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>; + (PUNPCKHDQrr VR128:$src, VR128:$src)>; } let AddedComplexity = 20 in { @@ -3108,45 +3144,49 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; + (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), - (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; + (MOVLPDrm VR128:$src1, addr:$src2)>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVLPDmr addr:$src1, VR128:$src2)>; def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), - (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVLPDmr addr:$src1, VR128:$src2)>; let AddedComplexity = 15 in { // Setting the lowest element in the vector. def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), - (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSSrr (v4i32 VR128:$src1), + (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>; def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr (v2i64 VR128:$src1), + (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>; -// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd) +// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + Requires<[HasSSE2]>; def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>; + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + Requires<[HasSSE2]>; } // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but // fall back to this for SSE1) def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), (SHUFPSrri VR128:$src2, VR128:$src1, - (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; + (SHUFFLE_get_shuf_imm VR128:$src3))>; // Set lowest element and zero upper elements. let AddedComplexity = 15 in @@ -3188,30 +3228,30 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), // Use movaps / movups for SSE integer load / store (one byte shorter). def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>; + (MOVAPSrm addr:$src)>; def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>; + (MOVUPSrm addr:$src)>; def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>; + (MOVAPSrm addr:$src)>; def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>; + (MOVUPSrm addr:$src)>; def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVAPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; + (MOVUPSmr addr:$dst, VR128:$src)>; //===----------------------------------------------------------------------===// // SSE4.1 Instructions @@ -3400,7 +3440,7 @@ let Constraints = "$src1 = $dst" in { (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, - (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize; + (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize; def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 91c0fbb..250634f 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -55,6 +55,11 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { if (!is64Bit) Data64bitsDirective = 0; // we can't emit a 64-bit unit + // Use ## as a comment string so that .s files generated by llvm can go + // through the GCC preprocessor without causing an error. This is needed + // because "clang foo.s" runs the C preprocessor, which is usually reserved + // for .S files on other systems. Perhaps this is because the file system + // wasn't always case preserving or something. CommentString = "##"; PCSymbol = "."; @@ -70,6 +75,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; + TextAlignFillValue = 0x90; + PrivateGlobalPrefix = ".L"; WeakRefDirective = "\t.weak\t"; PCSymbol = "."; @@ -94,4 +101,6 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { AsmTransCBE = x86_asm_table; AssemblerDialect = AsmWriterFlavor; -}
\ No newline at end of file + + TextAlignFillValue = 0x90; +} diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 8524236..946d6b2 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR16_NOREXRegClass; else if (A == &X86::GR16_ABCDRegClass) return &X86::GR16_ABCDRegClass; + } else if (B == &X86::FR32RegClass) { + return A; } break; case 2: @@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || A == &X86::GR16_NOREXRegClass) return &X86::GR16_ABCDRegClass; + } else if (B == &X86::FR64RegClass) { + return A; } break; case 3: @@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR32_NOREXRegClass; else if (A == &X86::GR32_ABCDRegClass) return &X86::GR64_ABCDRegClass; + } else if (B == &X86::VR128RegClass) { + return A; } break; case 4: @@ -446,8 +452,10 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); bool requiresRealignment = - RealignStack && (MFI->getMaxAlignment() > StackAlign); + RealignStack && ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttr(Attribute::StackAlignment)); // FIXME: Currently we don't support stack realignment for functions with // variable-sized allocas. diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 8fb5e92..e4bdb4e 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -35,7 +35,8 @@ namespace X86 { /// these indices must be kept in sync with the class indices in the /// X86RegisterInfo.td file. enum SubregIndex { - SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4 + SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4, + SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3 }; } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 1559bf7..ed2ce6c 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -158,22 +158,22 @@ let Namespace = "X86" in { def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; // YMM Registers, used by AVX instructions - def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>; - def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>; - def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>; - def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>; - def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>; - def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>; - def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>; - def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>; - def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>; - def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>; - def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>; - def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>; - def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>; - def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>; - def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>; - def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>; + def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>; + def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>; + def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>; + def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>; + def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>; + def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>; + def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>; + def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>; + def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>; + def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>; + def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>; + def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>; + def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>; + def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>; + def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>; + def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>; // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; @@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; def x86_subreg_16bit : PatLeaf<(i32 3)>; def x86_subreg_32bit : PatLeaf<(i32 4)>; +def x86_subreg_ss : PatLeaf<(i32 1)>; +def x86_subreg_sd : PatLeaf<(i32 2)>; +def x86_subreg_xmm : PatLeaf<(i32 3)>; + def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], [AL, CL, DL, BL, SPL, BPL, SIL, DIL, @@ -277,11 +281,31 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; -def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, +def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; +def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + +def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is @@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { + let SubRegClassList = [FR32, FR64]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, - YMM12, YMM13, YMM14, YMM15]>; + YMM12, YMM13, YMM14, YMM15]> { + let SubRegClassList = [FR32, FR64, VR128]; +} // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 5e05c2f..594a470 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -20,9 +20,9 @@ namespace llvm { class GlobalValue; class TargetMachine; - + /// PICStyles - The X86 backend supports a number of different styles of PIC. -/// +/// namespace PICStyles { enum Style { StubPIC, // Used on i386-darwin in -fPIC mode. @@ -46,7 +46,7 @@ protected: /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; - + /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or /// none supported. X86SSEEnum X86SSELevel; @@ -58,7 +58,7 @@ protected: /// HasCMov - True if this processor has conditional move instructions /// (generally pentium pro+). bool HasCMov; - + /// HasX86_64 - True if the processor supports X86-64 instructions. /// bool HasX86_64; @@ -78,8 +78,9 @@ protected: /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; - /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands. - /// This may require setting a feature bit in the processor. + /// HasVectorUAMem - True if SIMD operations can have unaligned memory + /// operands. This may require setting a feature bit in the + /// processor. bool HasVectorUAMem; /// DarwinVers - Nonzero if this is a darwin platform: the numeric @@ -150,20 +151,20 @@ public: bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } - + bool isTargetWindows() const { return TargetType == isWindows; } bool isTargetMingw() const { return TargetType == isMingw; } bool isTargetCygwin() const { return TargetType == isCygwin; } bool isTargetCygMing() const { return TargetType == isMingw || TargetType == isCygwin; } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { return TargetType == isMingw || TargetType == isCygwin || TargetType == isWindows; } - + bool isTargetWin64() const { return Is64Bit && (TargetType == isMingw || TargetType == isWindows); } @@ -196,11 +197,11 @@ public: bool isPICStyleStubAny() const { return PICStyle == PICStyles::StubDynamicNoPIC || PICStyle == PICStyles::StubPIC; } - + /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. unsigned getDarwinVers() const { return DarwinVers; } - + /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 7802f98..56ddaf8 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -51,6 +51,12 @@ extern "C" void LLVMInitializeX86Target() { createX86_32MCCodeEmitter); TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86_64MCCodeEmitter); + + // Register the asm backend. + TargetRegistry::RegisterAsmBackend(TheX86_32Target, + createX86_32AsmBackend); + TargetRegistry::RegisterAsmBackend(TheX86_64Target, + createX86_64AsmBackend); } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index d1ee3fc..29a0be5 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -18,38 +18,6 @@ using namespace llvm; using namespace dwarf; -const MCExpr *X8632_MachoTargetObjectFile:: -getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding) const { - // The mach-o version of this method defaults to returning a stub reference. - - if (Encoding & DW_EH_PE_indirect) { - MachineModuleInfoMachO &MachOMMI = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += "$non_lazy_ptr"; - - // Add information about the stub reference to MachOMMI so that the stub - // gets emitted by the asmprinter. - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); - MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); - if (StubSym == 0) { - Name.clear(); - Mang->getNameWithPrefix(Name, GV, false); - StubSym = getContext().GetOrCreateSymbol(Name.str()); - } - - return TargetLoweringObjectFile:: - getSymbolForDwarfReference(Sym, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect); - } - - return TargetLoweringObjectFileMachO:: - getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding); -} - const MCExpr *X8664_MachoTargetObjectFile:: getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding) const { @@ -148,35 +116,3 @@ unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { return DW_EH_PE_absptr; } - -unsigned X8632_MachoTargetObjectFile::getPersonalityEncoding() const { - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - -unsigned X8632_MachoTargetObjectFile::getLSDAEncoding() const { - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - -unsigned X8632_MachoTargetObjectFile::getFDEEncoding() const { - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - -unsigned X8632_MachoTargetObjectFile::getTTypeEncoding() const { - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - -unsigned X8664_MachoTargetObjectFile::getPersonalityEncoding() const { - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - -unsigned X8664_MachoTargetObjectFile::getLSDAEncoding() const { - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - -unsigned X8664_MachoTargetObjectFile::getFDEEncoding() const { - return DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} - -unsigned X8664_MachoTargetObjectFile::getTTypeEncoding() const { - return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; -} diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 0fff194..0444417 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -17,20 +17,6 @@ namespace llvm { class X86TargetMachine; - /// X8632_MachoTargetObjectFile - This TLOF implementation is used for - /// Darwin/x86-32. - class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { - public: - - virtual const MCExpr * - getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding) const; - virtual unsigned getPersonalityEncoding() const; - virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding() const; - virtual unsigned getTTypeEncoding() const; - }; - /// X8664_MachoTargetObjectFile - This TLOF implementation is used for /// Darwin/x86-64. class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { @@ -39,17 +25,13 @@ namespace llvm { virtual const MCExpr * getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding) const; - virtual unsigned getPersonalityEncoding() const; - virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding() const; - virtual unsigned getTTypeEncoding() const; }; class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { const X86TargetMachine &TM; public: X8632_ELFTargetObjectFile(const X86TargetMachine &tm) - :TM(tm) { }; + :TM(tm) { } virtual unsigned getPersonalityEncoding() const; virtual unsigned getLSDAEncoding() const; virtual unsigned getFDEEncoding() const; @@ -60,7 +42,7 @@ namespace llvm { const X86TargetMachine &TM; public: X8664_ELFTargetObjectFile(const X86TargetMachine &tm) - :TM(tm) { }; + :TM(tm) { } virtual unsigned getPersonalityEncoding() const; virtual unsigned getLSDAEncoding() const; virtual unsigned getFDEEncoding() const; |