summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp')
-rw-r--r--contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp196
1 files changed, 97 insertions, 99 deletions
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f61f631..3c9cb17 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -358,8 +358,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case TargetLowering::Legal:
break;
case TargetLowering::Custom: {
- SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
- if (Tmp1.getNode()) {
+ if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
Result = Tmp1;
break;
}
@@ -493,21 +492,26 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
- SDLoc dl(Op);
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
- SDValue Chain = LD->getChain();
- SDValue BasePTR = LD->getBasePtr();
- EVT SrcVT = LD->getMemoryVT();
- ISD::LoadExtType ExtType = LD->getExtensionType();
- SmallVector<SDValue, 8> Vals;
- SmallVector<SDValue, 8> LoadChains;
+ EVT SrcVT = LD->getMemoryVT();
+ EVT SrcEltVT = SrcVT.getScalarType();
unsigned NumElem = SrcVT.getVectorNumElements();
- EVT SrcEltVT = SrcVT.getScalarType();
- EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
+ SDValue NewChain;
+ SDValue Value;
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
+ SDLoc dl(Op);
+
+ SmallVector<SDValue, 8> Vals;
+ SmallVector<SDValue, 8> LoadChains;
+
+ EVT DstEltVT = LD->getValueType(0).getScalarType();
+ SDValue Chain = LD->getChain();
+ SDValue BasePTR = LD->getBasePtr();
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
// When elements in a vector is not byte-addressable, we cannot directly
// load each element by advancing pointer, which could only address bytes.
// Instead, we load all significant words, mask bits off, and concatenate
@@ -531,24 +535,22 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
unsigned LoadBytes = WideBytes;
if (RemainingBytes >= LoadBytes) {
- ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
- LD->getPointerInfo().getWithOffset(Offset),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->isInvariant(),
- MinAlign(LD->getAlignment(), Offset),
- LD->getAAInfo());
+ ScalarLoad =
+ DAG.getLoad(WideVT, dl, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
} else {
EVT LoadVT = WideVT;
while (RemainingBytes < LoadBytes) {
LoadBytes >>= 1; // Reduce the load size by half.
LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
}
- ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
- LD->getPointerInfo().getWithOffset(Offset),
- LoadVT, LD->isVolatile(),
- LD->isNonTemporal(), LD->isInvariant(),
- MinAlign(LD->getAlignment(), Offset),
- LD->getAAInfo());
+ ScalarLoad =
+ DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
+ LD->getPointerInfo().getWithOffset(Offset), LoadVT,
+ MinAlign(LD->getAlignment(), Offset),
+ LD->getMemOperand()->getFlags(), LD->getAAInfo());
}
RemainingBytes -= LoadBytes;
@@ -614,29 +616,17 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
Vals.push_back(Lo);
}
- } else {
- unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
- for (unsigned Idx=0; Idx<NumElem; Idx++) {
- SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
- Op.getNode()->getValueType(0).getScalarType(),
- Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
- SrcVT.getScalarType(),
- LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(),
- MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo());
-
- BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getConstant(Stride, dl, BasePTR.getValueType()));
+ NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getNode()->getValueType(0), Vals);
+ } else {
+ SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
- Vals.push_back(ScalarLoad.getValue(0));
- LoadChains.push_back(ScalarLoad.getValue(1));
- }
+ NewChain = Scalarized.getValue(1);
+ Value = Scalarized.getValue(0);
}
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Op.getNode()->getValueType(0), Vals);
-
AddLegalizedOperand(Op.getValue(0), Value);
AddLegalizedOperand(Op.getValue(1), NewChain);
@@ -644,54 +634,37 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
SDValue VectorLegalizer::ExpandStore(SDValue Op) {
- SDLoc dl(Op);
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
- SDValue Chain = ST->getChain();
- SDValue BasePTR = ST->getBasePtr();
- SDValue Value = ST->getValue();
- EVT StVT = ST->getMemoryVT();
-
- unsigned Alignment = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
- AAMDNodes AAInfo = ST->getAAInfo();
- unsigned NumElem = StVT.getVectorNumElements();
- // The type of the data we want to save
- EVT RegVT = Value.getValueType();
- EVT RegSclVT = RegVT.getScalarType();
- // The type of data as saved in memory.
+ EVT StVT = ST->getMemoryVT();
EVT MemSclVT = StVT.getScalarType();
-
- // Cast floats into integers
unsigned ScalarSize = MemSclVT.getSizeInBits();
// Round odd types to the next pow of two.
- if (!isPowerOf2_32(ScalarSize))
- ScalarSize = NextPowerOf2(ScalarSize);
-
- // Store Stride in bytes
- unsigned Stride = ScalarSize/8;
- // Extract each of the elements from the original vector
- // and save them into memory individually.
- SmallVector<SDValue, 8> Stores;
- for (unsigned Idx = 0; Idx < NumElem; Idx++) {
- SDValue Ex = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value,
- DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-
- // This scalar TruncStore may be illegal, but we legalize it later.
- SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
- ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
- isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride),
- AAInfo);
-
- BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getConstant(Stride, dl, BasePTR.getValueType()));
-
- Stores.push_back(Store);
+ if (!isPowerOf2_32(ScalarSize)) {
+ // FIXME: This is completely broken and inconsistent with ExpandLoad
+ // handling.
+
+ // For sub-byte element sizes, this ends up with 0 stride between elements,
+ // so the same element just gets re-written to the same location. There seem
+ // to be tests explicitly testing for this broken behavior though. tests
+ // for this broken behavior.
+
+ LLVMContext &Ctx = *DAG.getContext();
+
+ EVT NewMemVT
+ = EVT::getVectorVT(Ctx,
+ MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)),
+ StVT.getVectorNumElements());
+
+ SDValue NewVectorStore = DAG.getTruncStore(
+ ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(),
+ ST->getPointerInfo(), NewMemVT, ST->getAlignment(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ ST = cast<StoreSDNode>(NewVectorStore.getNode());
}
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
AddLegalizedOperand(Op, TF);
return TF;
}
@@ -864,10 +837,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
int NumSrcElements = SrcVT.getVectorNumElements();
// Build up a zero vector to blend into this one.
- EVT SrcScalarVT = SrcVT.getScalarType();
- SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT);
- SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
+ SDValue Zero = DAG.getConstant(0, DL, SrcVT);
// Shuffle the incoming lanes into the correct position, and pull all other
// lanes from the zero vector.
@@ -885,16 +855,19 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
}
-SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
- EVT VT = Op.getValueType();
-
- // Generate a byte wise shuffle mask for the BSWAP.
- SmallVector<int, 16> ShuffleMask;
+static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
+}
+SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Generate a byte wise shuffle mask for the BSWAP.
+ SmallVector<int, 16> ShuffleMask;
+ createBSWAPShuffleMask(VT, ShuffleMask);
EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
// Only emit a shuffle if the mask is legal.
@@ -903,8 +876,7 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
SDLoc DL(Op);
Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
- Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
- ShuffleMask.data());
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
@@ -915,12 +887,36 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
return DAG.UnrollVectorOp(Op.getNode());
+ // If the vector element width is a whole number of bytes, test if its legal
+ // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
+ // vector. This greatly reduces the number of bit shifts necessary.
+ unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
+ if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
+ SmallVector<int, 16> BSWAPMask;
+ createBSWAPShuffleMask(VT, BSWAPMask);
+
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
+ if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
+ (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
+ (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
+ SDLoc DL(Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
+ BSWAPMask);
+ Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
+ }
+
// If we have the appropriate vector bit operations, it is better to use them
// than unrolling and expanding each component.
if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
!TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
return DAG.UnrollVectorOp(Op.getNode());
// Let LegalizeDAG handle this later.
@@ -1027,10 +1023,12 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
}
SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
- // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle.
+ // If the non-ZERO_UNDEF version is supported we can use that instead.
unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
- if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType()))
- return Op;
+ if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) {
+ SDLoc DL(Op);
+ return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0));
+ }
// Otherwise go ahead and unroll.
return DAG.UnrollVectorOp(Op.getNode());
OpenPOWER on IntegriCloud