diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 271 |
1 files changed, 271 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp new file mode 100644 index 0000000..6fded9c --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -0,0 +1,271 @@ +//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARMSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARMTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DerivedTypes.h" +using namespace llvm; + +#define DEBUG_TYPE "arm-selectiondag-info" + +// Emit, if possible, a specialized version of the given Libcall. Typically this +// means selecting the appropriately aligned version, but we also convert memset +// of 0 into memclr. +SDValue ARMSelectionDAGInfo:: +EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + RTLIB::Libcall LC) const { + const ARMSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); + const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); + + // Only use a specialized AEABI function if the default version of this + // Libcall is an AEABI function. + if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) + return SDValue(); + + // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be + // able to translate memset to memclr and use the value to index the function + // name array. + enum { + AEABI_MEMCPY = 0, + AEABI_MEMMOVE, + AEABI_MEMSET, + AEABI_MEMCLR + } AEABILibcall; + switch (LC) { + case RTLIB::MEMCPY: + AEABILibcall = AEABI_MEMCPY; + break; + case RTLIB::MEMMOVE: + AEABILibcall = AEABI_MEMMOVE; + break; + case RTLIB::MEMSET: + AEABILibcall = AEABI_MEMSET; + if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) + if (ConstantSrc->getZExtValue() == 0) + AEABILibcall = AEABI_MEMCLR; + break; + default: + return SDValue(); + } + + // Choose the most-aligned libcall variant that we can + enum { + ALIGN1 = 0, + ALIGN4, + ALIGN8 + } AlignVariant; + if ((Align & 7) == 0) + AlignVariant = ALIGN8; + else if ((Align & 3) == 0) + AlignVariant = ALIGN4; + else + AlignVariant = ALIGN1; + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + if (AEABILibcall == AEABI_MEMCLR) { + Entry.Node = Size; + Args.push_back(Entry); + } else if (AEABILibcall == AEABI_MEMSET) { + // Adjust parameters for memset, EABI uses format (ptr, size, value), + // GNU library uses (ptr, value, size) + // See RTABI section 4.3.4 + Entry.Node = Size; + Args.push_back(Entry); + + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else if (Src.getValueType().bitsLT(MVT::i32)) + Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + + Entry.Node = Src; + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.isSExt = false; + Args.push_back(Entry); + } else { + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Node = Size; + Args.push_back(Entry); + } + + char const *FunctionNames[4][3] = { + { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, + { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, + { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, + { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } + }; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args), 0) + .setDiscardResult(); + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + + return CallResult.second; +} + +SDValue +ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + const ARMSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMCPY); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMCPY); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + // Emit a maximum of 4 loads in Thumb1 since we have fewer registers + const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; + SDValue TFOps[6]; + SDValue Loads[6]; + uint64_t SrcOff = 0, DstOff = 0; + + // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to + // VLDM/VSTM and make this code emit it when appropriate. This would reduce + // pressure on the general purpose registers. However this seems harder to map + // onto the register allocator's view of the world. + + // The number of MEMCPY pseudo-instructions to emit. We use up to + // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm + // later on. This is a lower bound on the number of MEMCPY operations we must + // emit. + unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; + + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); + + for (unsigned I = 0; I != NumMEMCPYs; ++I) { + // Evenly distribute registers among MEMCPY operations to reduce register + // pressure. + unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; + unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; + + Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, + DAG.getConstant(NumRegs, dl, MVT::i32)); + Src = Dst.getValue(1); + Chain = Dst.getValue(2); + + DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); + SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); + + EmittedNumMemOps = NextEmittedNumMemOps; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, dl, MVT::i32)), + SrcPtrInfo.getWithOffset(SrcOff), + false, false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, dl, MVT::i32)), + DstPtrInfo.getWithOffset(DstOff), false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); +} + + +SDValue ARMSelectionDAGInfo:: +EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMMOVE); +} + + +SDValue ARMSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMSET); +} |