diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 84 |
1 files changed, 65 insertions, 19 deletions
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index e4c58d4..ef3318b 100644 --- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -35,7 +35,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { unsigned CountS = (Imm >> 6) & 3; // CountS selects which input element to use. - unsigned InVal = 4+CountS; + unsigned InVal = 4 + CountS; // CountD specifies which element of destination to update. ShuffleMask[CountD] = InVal; // ZMask zaps values, potentially overriding the CountD elt. @@ -47,20 +47,20 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { // <3,1> or <6,7,2,3> void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { - for (unsigned i = NElts/2; i != NElts; ++i) - ShuffleMask.push_back(NElts+i); + for (unsigned i = NElts / 2; i != NElts; ++i) + ShuffleMask.push_back(NElts + i); - for (unsigned i = NElts/2; i != NElts; ++i) + for (unsigned i = NElts / 2; i != NElts; ++i) ShuffleMask.push_back(i); } // <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) + for (unsigned i = 0; i != NElts / 2; ++i) ShuffleMask.push_back(i); - for (unsigned i = 0; i != NElts/2; ++i) - ShuffleMask.push_back(NElts+i); + for (unsigned i = 0; i != NElts / 2; ++i) + ShuffleMask.push_back(NElts + i); } void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { @@ -79,6 +79,20 @@ void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { } } +void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { + unsigned VectorSizeInBits = VT.getSizeInBits(); + unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VectorSizeInBits / 128; + unsigned NumLaneElts = NumElts / NumLanes; + unsigned NumLaneSubElts = 64 / ScalarSizeInBits; + + for (unsigned l = 0; l < NumElts; l += NumLaneElts) + for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts) + for (unsigned s = 0; s != NumLaneSubElts; s++) + ShuffleMask.push_back(l + s); +} + void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { unsigned VectorSizeInBits = VT.getSizeInBits(); unsigned NumElts = VectorSizeInBits / 8; @@ -189,8 +203,8 @@ void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { unsigned NewImm = Imm; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { // each half of a lane comes from different source - for (unsigned s = 0; s != NumElts*2; s += NumElts) { - for (unsigned i = 0; i != NumLaneElts/2; ++i) { + for (unsigned s = 0; s != NumElts * 2; s += NumElts) { + for (unsigned i = 0; i != NumLaneElts / 2; ++i) { ShuffleMask.push_back(NewImm % NumLaneElts + s + l); NewImm /= NumLaneElts; } @@ -212,9 +226,9 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { unsigned NumLaneElts = NumElts / NumLanes; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { - ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumElts); // Reads from src/src2 + for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i + NumElts); // Reads from src/src2 } } } @@ -232,9 +246,9 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { unsigned NumLaneElts = NumElts / NumLanes; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { - for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { - ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumElts); // Reads from src/src2 + for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i + NumElts); // Reads from src/src2 } } } @@ -244,11 +258,11 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, if (Imm & 0x88) return; // Not a shuffle - unsigned HalfSize = VT.getVectorNumElements()/2; + unsigned HalfSize = VT.getVectorNumElements() / 2; for (unsigned l = 0; l != 2; ++l) { - unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; - for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) + unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize; + for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i) ShuffleMask.push_back(i); } } @@ -341,7 +355,7 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { /// No VT provided since it only works on 256-bit, 4 element vectors. void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back((Imm >> (2*i)) & 3); + ShuffleMask.push_back((Imm >> (2 * i)) & 3); } } @@ -385,4 +399,36 @@ void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) { } } +void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) { + unsigned NumDstElts = DstVT.getVectorNumElements(); + unsigned SrcScalarBits = SrcVT.getScalarSizeInBits(); + unsigned DstScalarBits = DstVT.getScalarSizeInBits(); + unsigned Scale = DstScalarBits / SrcScalarBits; + assert(SrcScalarBits < DstScalarBits && + "Expected zero extension mask to increase scalar size"); + assert(SrcVT.getVectorNumElements() >= NumDstElts && + "Too many zero extension lanes"); + + for (unsigned i = 0; i != NumDstElts; i++) { + Mask.push_back(i); + for (unsigned j = 1; j != Scale; j++) + Mask.push_back(SM_SentinelZero); + } +} + +void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + ShuffleMask.push_back(0); + for (unsigned i = 1; i < NumElts; i++) + ShuffleMask.push_back(SM_SentinelZero); +} + +void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) { + // First element comes from the first element of second source. + // Remaining elements: Load zero extends / Move copies from first source. + unsigned NumElts = VT.getVectorNumElements(); + Mask.push_back(NumElts); + for (unsigned i = 1; i < NumElts; i++) + Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i); +} } // llvm namespace |