summaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Scalar/ScalarReplAggregates.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/ScalarReplAggregates.cpp')
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp298
1 files changed, 252 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index c3ca852..8178c27 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -219,7 +219,7 @@ namespace {
/// optimization, which scans the uses of an alloca and determines if it can
/// rewrite it in terms of a single new alloca that can be mem2reg'd.
class ConvertToScalarInfo {
- /// AllocaSize - The size of the alloca being considered.
+ /// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
const TargetData &TD;
@@ -238,19 +238,22 @@ class ConvertToScalarInfo {
/// also declared as a vector, we do want to promote to a vector.
bool HadAVector;
+ /// HadNonMemTransferAccess - True if there is at least one access to the
+ /// alloca that is not a MemTransferInst. We don't want to turn structs into
+ /// large integers unless there is some potential for optimization.
+ bool HadNonMemTransferAccess;
+
public:
explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
- : AllocaSize(Size), TD(td) {
- IsNotTrivial = false;
- VectorTy = 0;
- HadAVector = false;
- }
+ : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
+ HadAVector(false), HadNonMemTransferAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
bool CanConvertToScalar(Value *V, uint64_t Offset);
- void MergeInType(const Type *In, uint64_t Offset);
+ void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+ bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
@@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
} else {
+ unsigned BitWidth = AllocaSize * 8;
+ if (!HadAVector && !HadNonMemTransferAccess &&
+ !TD.fitsInLegalInteger(BitWidth))
+ return 0;
+
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
- NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+ NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
ConvertUsesToScalar(AI, NewAI, 0);
@@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
/// so far at the offset specified by Offset (which is specified in bytes).
///
-/// There are two cases we handle here:
+/// There are three cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
-/// 2) A fully general blob of memory, which we turn into some (potentially
+/// 2) A union of vector types with power-of-2 size differences, e.g. a float,
+/// <2 x float> and <4 x float>. Here we turn element accesses into insert
+/// and extract element operations, and <2 x float> accesses into a cast to
+/// <2 x double>, an extract, and a cast back to <2 x float>.
+/// 3) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
+ bool IsLoadOrStore) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
if (VectorTy && VectorTy->isVoidTy())
@@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
- // Remember if we saw a vector type.
- HadAVector = true;
-
- if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
- // If we're storing/loading a vector of the right size, allow it as a
- // vector. If this the first vector we see, remember the type so that
- // we know the element size. If this is a subsequent access, ignore it
- // even if it is a differing type but the same size. Worst case we can
- // bitcast the resultant vectors.
- if (VectorTy == 0)
- VectorTy = VInTy;
+ if (MergeInVectorType(VInTy, Offset))
return;
- }
} else if (In->isFloatTy() || In->isDoubleTy() ||
(In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+ // Full width accesses can be ignored, because they can always be turned
+ // into bitcasts.
+ unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+ if (IsLoadOrStore && EltSize == AllocaSize)
+ return;
+
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
- unsigned EltSize = In->getPrimitiveSizeInBits()/8;
- if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
- (VectorTy == 0 ||
- cast<VectorType>(VectorTy)->getElementType()
- ->getPrimitiveSizeInBits()/8 == EltSize)) {
- if (VectorTy == 0)
+ if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) {
+ if (!VectorTy) {
VectorTy = VectorType::get(In, AllocaSize/EltSize);
- return;
+ return;
+ }
+
+ unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType()
+ ->getPrimitiveSizeInBits()/8;
+ if (EltSize == CurrentEltSize)
+ return;
+
+ if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize))
+ return;
}
}
@@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
VectorTy = Type::getVoidTy(In->getContext());
}
+/// MergeInVectorType - Handles the vector case of MergeInType, returning true
+/// if the type was successfully merged and false otherwise.
+bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
+ uint64_t Offset) {
+ // Remember if we saw a vector type.
+ HadAVector = true;
+
+ // TODO: Support nonzero offsets?
+ if (Offset != 0)
+ return false;
+
+ // Only allow vectors that are a power-of-2 away from the size of the alloca.
+ if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8)))
+ return false;
+
+ // If this the first vector we see, remember the type so that we know the
+ // element size.
+ if (!VectorTy) {
+ VectorTy = VInTy;
+ return true;
+ }
+
+ unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+ unsigned InBitWidth = VInTy->getBitWidth();
+
+ // Vectors of the same size can be converted using a simple bitcast.
+ if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+ return true;
+
+ const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
+ const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+
+ // Do not allow mixed integer and floating-point accesses from vectors of
+ // different sizes.
+ if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy())
+ return false;
+
+ if (ElementTy->isFloatingPointTy()) {
+ // Only allow floating-point vectors of different sizes if they have the
+ // same element type.
+ // TODO: This could be loosened a bit, but would anything benefit?
+ if (ElementTy != InElementTy)
+ return false;
+
+ // There are no arbitrary-precision floating-point types, which limits the
+ // number of legal vector types with larger element types that we can form
+ // to bitcast and extract a subvector.
+ // TODO: We could support some more cases with mixed fp128 and double here.
+ if (!(BitWidth == 64 || BitWidth == 128) ||
+ !(InBitWidth == 64 || InBitWidth == 128))
+ return false;
+ } else {
+ assert(ElementTy->isIntegerTy() && "Vector elements must be either integer "
+ "or floating-point.");
+ unsigned BitWidth = ElementTy->getPrimitiveSizeInBits();
+ unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits();
+
+ // Do not allow integer types smaller than a byte or types whose widths are
+ // not a multiple of a byte.
+ if (BitWidth < 8 || InBitWidth < 8 ||
+ BitWidth % 8 != 0 || InBitWidth % 8 != 0)
+ return false;
+ }
+
+ // Pick the largest of the two vector types.
+ if (InBitWidth > BitWidth)
+ VectorTy = VInTy;
+
+ return true;
+}
+
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
/// its accesses to a single vector type, return true and set VecTy to
/// the new type. If we could convert the alloca into a single promotable
@@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
return false;
- MergeInType(LI->getType(), Offset);
+ HadNonMemTransferAccess = true;
+ MergeInType(LI->getType(), Offset, true);
continue;
}
@@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
- MergeInType(SI->getOperand(0)->getType(), Offset);
+ HadNonMemTransferAccess = true;
+ MergeInType(SI->getOperand(0)->getType(), Offset, true);
continue;
}
@@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
if (!CanConvertToScalar(GEP, Offset+GEPOffset))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
continue;
}
@@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
!isa<ConstantInt>(MSI->getLength()))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
+ HadNonMemTransferAccess = true;
continue;
}
@@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
}
}
+/// getScaledElementType - Gets a scaled element type for a partial vector
+/// access of an alloca. The input types must be integer or floating-point
+/// scalar or vector types, and the resulting type is an integer, float or
+/// double.
+static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2,
+ unsigned NewBitWidth) {
+ bool IsFP1 = Ty1->isFloatingPointTy() ||
+ (Ty1->isVectorTy() &&
+ cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy());
+ bool IsFP2 = Ty2->isFloatingPointTy() ||
+ (Ty2->isVectorTy() &&
+ cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy());
+
+ LLVMContext &Context = Ty1->getContext();
+
+ // Prefer floating-point types over integer types, as integer types may have
+ // been created by earlier scalar replacement.
+ if (IsFP1 || IsFP2) {
+ if (NewBitWidth == 32)
+ return Type::getFloatTy(Context);
+ if (NewBitWidth == 64)
+ return Type::getDoubleTy(Context);
+ }
+
+ return Type::getIntNTy(Context, NewBitWidth);
+}
+
+/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector
+/// to another vector of the same element type which has the same allocation
+/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>).
+static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType,
+ IRBuilder<> &Builder) {
+ const Type *FromType = FromVal->getType();
+ const VectorType *FromVTy = cast<VectorType>(FromType);
+ const VectorType *ToVTy = cast<VectorType>(ToType);
+ assert((ToVTy->getElementType() == FromVTy->getElementType()) &&
+ "Vectors must have the same element type");
+ Value *UnV = UndefValue::get(FromType);
+ unsigned numEltsFrom = FromVTy->getNumElements();
+ unsigned numEltsTo = ToVTy->getNumElements();
+
+ SmallVector<Constant*, 3> Args;
+ const Type* Int32Ty = Builder.getInt32Ty();
+ unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+ unsigned i;
+ for (i=0; i != minNumElts; ++i)
+ Args.push_back(ConstantInt::get(Int32Ty, i));
+
+ if (i < numEltsTo) {
+ Constant* UnC = UndefValue::get(Int32Ty);
+ for (; i != numEltsTo; ++i)
+ Args.push_back(UnC);
+ }
+ Constant *Mask = ConstantVector::get(Args);
+ return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
+}
+
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
/// or vector value FromVal, extracting the bits from the offset specified by
/// Offset. This returns the value, which is of type ToType.
@@ -589,14 +734,46 @@ Value *ConvertToScalarInfo::
ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
uint64_t Offset, IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
- if (FromVal->getType() == ToType && Offset == 0)
+ const Type *FromType = FromVal->getType();
+ if (FromType == ToType && Offset == 0)
return FromVal;
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
- if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (ToType->isVectorTy())
- return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
+ unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+ if (ToTypeSize == AllocaSize) {
+ // If the two types have the same primitive size, use a bit cast.
+ // Otherwise, it is two vectors with the same element type that has
+ // the same allocation size but different number of elements so use
+ // a shuffle vector.
+ if (FromType->getPrimitiveSizeInBits() ==
+ ToType->getPrimitiveSizeInBits())
+ return Builder.CreateBitCast(FromVal, ToType, "tmp");
+ else
+ return CreateShuffleVectorCast(FromVal, ToType, Builder);
+ }
+
+ if (isPowerOf2_64(AllocaSize / ToTypeSize)) {
+ assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value "
+ "of a smaller vector type at a nonzero offset.");
+
+ const Type *CastElementTy = getScaledElementType(FromType, ToType,
+ ToTypeSize * 8);
+ unsigned NumCastVectorElements = AllocaSize / ToTypeSize;
+
+ LLVMContext &Context = FromVal->getContext();
+ const Type *CastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp");
+
+ unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+ unsigned Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get(
+ Type::getInt32Ty(Context), Elt), "tmp");
+ return Builder.CreateBitCast(Extract, ToType, "tmp");
+ }
// Otherwise it must be an element access.
unsigned Elt = 0;
@@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
// Changing the whole vector with memset or with an access of a different
// vector type?
- if (ValSize == VecSize)
- return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ if (ValSize == VecSize) {
+ // If the two types have the same primitive size, use a bit cast.
+ // Otherwise, it is two vectors with the same element type that has
+ // the same allocation size but different number of elements so use
+ // a shuffle vector.
+ if (VTy->getPrimitiveSizeInBits() ==
+ SV->getType()->getPrimitiveSizeInBits())
+ return Builder.CreateBitCast(SV, AllocaType, "tmp");
+ else
+ return CreateShuffleVectorCast(SV, VTy, Builder);
+ }
- uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ if (isPowerOf2_64(VecSize / ValSize)) {
+ assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a "
+ "value of a smaller vector type at a nonzero offset.");
- // Must be an element insertion.
- unsigned Elt = Offset/EltSize;
+ const Type *CastElementTy = getScaledElementType(VTy, SV->getType(),
+ ValSize);
+ unsigned NumCastVectorElements = VecSize / ValSize;
- if (SV->getType() != VTy->getElementType())
- SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+ LLVMContext &Context = SV->getContext();
+ const Type *OldCastTy = VectorType::get(CastElementTy,
+ NumCastVectorElements);
+ Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp");
- SV = Builder.CreateInsertElement(Old, SV,
+ Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp");
+
+ unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy);
+ unsigned Elt = Offset/EltSize;
+ assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+ Value *Insert =
+ Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get(
+ Type::getInt32Ty(Context), Elt), "tmp");
+ return Builder.CreateBitCast(Insert, AllocaType, "tmp");
+ }
+
+ // Must be an element insertion.
+ assert(SV->getType() == VTy->getElementType());
+ uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+ unsigned Elt = Offset/EltSize;
+ return Builder.CreateInsertElement(Old, SV,
ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
"tmp");
- return SV;
}
// If SV is a first-class aggregate value, insert each value recursively.
@@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
}
const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
- PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+ PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
+ PN->getName()+".ld", PN);
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
// matter which one we get and if any differ, it doesn't matter.
OpenPOWER on IntegriCloud