summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp')
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp84
1 files changed, 65 insertions, 19 deletions
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index e4c58d4..ef3318b 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -35,7 +35,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned CountS = (Imm >> 6) & 3;
// CountS selects which input element to use.
- unsigned InVal = 4+CountS;
+ unsigned InVal = 4 + CountS;
// CountD specifies which element of destination to update.
ShuffleMask[CountD] = InVal;
// ZMask zaps values, potentially overriding the CountD elt.
@@ -47,20 +47,20 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
// <3,1> or <6,7,2,3>
void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
- for (unsigned i = NElts/2; i != NElts; ++i)
- ShuffleMask.push_back(NElts+i);
+ for (unsigned i = NElts / 2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts + i);
- for (unsigned i = NElts/2; i != NElts; ++i)
+ for (unsigned i = NElts / 2; i != NElts; ++i)
ShuffleMask.push_back(i);
}
// <0,2> or <0,1,4,5>
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i)
+ for (unsigned i = 0; i != NElts / 2; ++i)
ShuffleMask.push_back(i);
- for (unsigned i = 0; i != NElts/2; ++i)
- ShuffleMask.push_back(NElts+i);
+ for (unsigned i = 0; i != NElts / 2; ++i)
+ ShuffleMask.push_back(NElts + i);
}
void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
@@ -79,6 +79,20 @@ void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
}
}
+void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VectorSizeInBits = VT.getSizeInBits();
+ unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumLanes = VectorSizeInBits / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+ unsigned NumLaneSubElts = 64 / ScalarSizeInBits;
+
+ for (unsigned l = 0; l < NumElts; l += NumLaneElts)
+ for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts)
+ for (unsigned s = 0; s != NumLaneSubElts; s++)
+ ShuffleMask.push_back(l + s);
+}
+
void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned VectorSizeInBits = VT.getSizeInBits();
unsigned NumElts = VectorSizeInBits / 8;
@@ -189,8 +203,8 @@ void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NewImm = Imm;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
// each half of a lane comes from different source
- for (unsigned s = 0; s != NumElts*2; s += NumElts) {
- for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+ for (unsigned s = 0; s != NumElts * 2; s += NumElts) {
+ for (unsigned i = 0; i != NumLaneElts / 2; ++i) {
ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
NewImm /= NumLaneElts;
}
@@ -212,9 +226,9 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumLaneElts = NumElts / NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
- ShuffleMask.push_back(i); // Reads from dest/src1
- ShuffleMask.push_back(i+NumElts); // Reads from src/src2
+ for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i + NumElts); // Reads from src/src2
}
}
}
@@ -232,9 +246,9 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumLaneElts = NumElts / NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
- ShuffleMask.push_back(i); // Reads from dest/src1
- ShuffleMask.push_back(i+NumElts); // Reads from src/src2
+ for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i + NumElts); // Reads from src/src2
}
}
}
@@ -244,11 +258,11 @@ void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
if (Imm & 0x88)
return; // Not a shuffle
- unsigned HalfSize = VT.getVectorNumElements()/2;
+ unsigned HalfSize = VT.getVectorNumElements() / 2;
for (unsigned l = 0; l != 2; ++l) {
- unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;
- for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)
+ unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
+ for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
ShuffleMask.push_back(i);
}
}
@@ -341,7 +355,7 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
for (unsigned i = 0; i != 4; ++i) {
- ShuffleMask.push_back((Imm >> (2*i)) & 3);
+ ShuffleMask.push_back((Imm >> (2 * i)) & 3);
}
}
@@ -385,4 +399,36 @@ void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
}
}
+void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
+ unsigned NumDstElts = DstVT.getVectorNumElements();
+ unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
+ unsigned DstScalarBits = DstVT.getScalarSizeInBits();
+ unsigned Scale = DstScalarBits / SrcScalarBits;
+ assert(SrcScalarBits < DstScalarBits &&
+ "Expected zero extension mask to increase scalar size");
+ assert(SrcVT.getVectorNumElements() >= NumDstElts &&
+ "Too many zero extension lanes");
+
+ for (unsigned i = 0; i != NumDstElts; i++) {
+ Mask.push_back(i);
+ for (unsigned j = 1; j != Scale; j++)
+ Mask.push_back(SM_SentinelZero);
+ }
+}
+
+void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ ShuffleMask.push_back(0);
+ for (unsigned i = 1; i < NumElts; i++)
+ ShuffleMask.push_back(SM_SentinelZero);
+}
+
+void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
+ // First element comes from the first element of second source.
+ // Remaining elements: Load zero extends / Move copies from first source.
+ unsigned NumElts = VT.getVectorNumElements();
+ Mask.push_back(NumElts);
+ for (unsigned i = 1; i < NumElts; i++)
+ Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
+}
} // llvm namespace
OpenPOWER on IntegriCloud