diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp | 114 |
1 files changed, 63 insertions, 51 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index f031a28..c67aa04 100644 --- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h" +#include "X86SelectionDAGInfo.h" #include "X86ISelLowering.h" +#include "X86InstrInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" -#include "X86SelectionDAGInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Target/TargetLowering.h" @@ -44,8 +44,26 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( return false; } +namespace { + +// Represents a cover of a buffer of Size bytes with Count() blocks of type AVT +// (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is +// always smaller than the block size). +struct RepMovsRepeats { + RepMovsRepeats(uint64_t Size) : Size(Size) {} + + uint64_t Count() const { return Size / UBytes(); } + uint64_t BytesLeft() const { return Size % UBytes(); } + uint64_t UBytes() const { return AVT.getSizeInBits() / 8; } + + const uint64_t Size; + MVT AVT = MVT::i8; +}; + +} // namespace + SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( - SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -69,10 +87,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); - if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { + if (const char *bzeroEntry = ValC && + ValC->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); @@ -85,10 +103,12 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args)) - .setDiscardResult(); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(bzeroEntry, IntPtr), + std::move(Args)) + .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; @@ -102,9 +122,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue InFlag; EVT AVT; SDValue Count; - ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val); unsigned BytesLeft = 0; - bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; @@ -146,7 +165,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal, dl); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag); InFlag = Chain.getValue(1); } @@ -161,20 +180,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, - CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - } else if (BytesLeft) { + if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); @@ -183,7 +189,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, dl, AddrVT)), - Src, + Val, DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile, false, DstPtrInfo.getWithOffset(Offset)); @@ -204,8 +210,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getMachineFunction().getSubtarget<X86Subtarget>(); if (!ConstantSize) return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) + RepMovsRepeats Repeats(ConstantSize->getZExtValue()); + if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However @@ -226,26 +232,31 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( if (isBaseRegConflictPossible(DAG, ClobberSet)) return SDValue(); - MVT AVT; - if (Align & 1) - AVT = MVT::i8; - else if (Align & 2) - AVT = MVT::i16; - else if (Align & 4) - // DWORD aligned - AVT = MVT::i32; - else - // QWORD aligned - AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; - - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal, dl); - unsigned BytesLeft = SizeVal % UBytes; + // If the target has enhanced REPMOVSB, then it's at least as fast to use + // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle + // BytesLeft. + if (!Subtarget.hasERMSB() && !(Align & 1)) { + if (Align & 2) + // WORD aligned + Repeats.AVT = MVT::i16; + else if (Align & 4) + // DWORD aligned + Repeats.AVT = MVT::i32; + else + // QWORD aligned + Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; + + if (Repeats.BytesLeft() > 0 && + DAG.getMachineFunction().getFunction()->optForMinSize()) { + // When agressively optimizing for size, avoid generating the code to + // handle BytesLeft. + Repeats.AVT = MVT::i8; + } + } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, - Count, InFlag); + DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); @@ -255,14 +266,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector<SDValue, 4> Results; Results.push_back(RepMovs); - if (BytesLeft) { + if (Repeats.BytesLeft()) { // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; + unsigned Offset = Repeats.Size - Repeats.BytesLeft(); EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); @@ -273,7 +284,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(BytesLeft, dl, SizeVT), + DAG.getConstant(Repeats.BytesLeft(), dl, + SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); |