diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 86 |
1 files changed, 65 insertions, 21 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3c9cb17..d4fa20f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -106,7 +106,8 @@ class VectorLegalizer { SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); SDValue ExpandBITREVERSE(SDValue Op); - SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); + SDValue ExpandCTLZ(SDValue Op); + SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); /// \brief Implements vector promotion. /// @@ -332,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -362,7 +365,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Result = Tmp1; break; } - // FALL THROUGH + LLVM_FALLTHROUGH; } case TargetLowering::Expand: Result = Expand(Op); @@ -693,9 +696,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return UnrollVSETCC(Op); case ISD::BITREVERSE: return ExpandBITREVERSE(Op); + case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: + return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: - return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); + return ExpandCTTZ_ZERO_UNDEF(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -770,8 +775,8 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { SDLoc DL(Op); EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); - unsigned BW = VT.getScalarType().getSizeInBits(); - unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + unsigned BW = VT.getScalarSizeInBits(); + unsigned OrigBW = OrigTy.getScalarSizeInBits(); SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); Op = Op.getOperand(0); @@ -817,8 +822,8 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { // Now we need sign extend. Do this by shifting the elements. Even if these // aren't legal operations, they have a better chance of being legalized // without full scalarization than the sign extension does. - unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); - unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + unsigned EltWidth = VT.getScalarSizeInBits(); + unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); return DAG.getNode(ISD::SRA, DL, VT, DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), @@ -951,7 +956,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // If the mask and the type are different sizes, unroll the vector op. This // can occur when getSetCCResultType returns something that is different in // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. - if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) + if (VT.getSizeInBits() != Op1.getValueSizeInBits()) return DAG.UnrollVectorOp(Op.getNode()); // Bitcast the operands to be the same type as the mask. @@ -961,7 +966,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT); + APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); @@ -979,21 +984,20 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Op.getNode()); - EVT SVT = VT.getScalarType(); - assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && - "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + unsigned BW = VT.getScalarSizeInBits(); + assert((BW == 64 || BW == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); - unsigned BW = SVT.getSizeInBits(); - SDValue HalfWord = DAG.getConstant(BW/2, DL, VT); + SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); // Constants to clear the upper part of the word. // Notice that we can also use SHL+SHR, but using a constant is slightly // faster on x86. - uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; + uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType()); + SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType()); // Clear upper part of LO, lower HI SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); @@ -1010,7 +1014,6 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); } - SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDLoc DL(Op); @@ -1022,12 +1025,53 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); } -SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { +SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { + EVT VT = Op.getValueType(); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // If the non-ZERO_UNDEF version is supported we can use that instead. + if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) { + SDLoc DL(Op); + return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0)); + } + + // If CTPOP is available we can lower with a CTPOP based method: + // u16 ctlz(u16 x) { + // x |= (x >> 1); + // x |= (x >> 2); + // x |= (x >> 4); + // x |= (x >> 8); + // return ctpop(~x); + // } + // Ref: "Hacker's Delight" by Henry Warren + if (isPowerOf2_32(NumBitsPerElt) && + TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) { + SDLoc DL(Op); + SDValue Res = Op.getOperand(0); + EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + + for (unsigned i = 1; i != NumBitsPerElt; i *= 2) + Res = DAG.getNode( + ISD::OR, DL, VT, Res, + DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy))); + + Res = DAG.getNOT(DL, Res, VT); + return DAG.getNode(ISD::CTPOP, DL, VT, Res); + } + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) { // If the non-ZERO_UNDEF version is supported we can use that instead. - unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; - if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) { + if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) { SDLoc DL(Op); - return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0)); + return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0)); } // Otherwise go ahead and unroll. |