diff options
Diffstat (limited to 'lib/Transforms/Scalar/ScalarReplAggregates.cpp')
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 298 |
1 files changed, 252 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index c3ca852..8178c27 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -219,7 +219,7 @@ namespace { /// optimization, which scans the uses of an alloca and determines if it can /// rewrite it in terms of a single new alloca that can be mem2reg'd. class ConvertToScalarInfo { - /// AllocaSize - The size of the alloca being considered. + /// AllocaSize - The size of the alloca being considered in bytes. unsigned AllocaSize; const TargetData &TD; @@ -238,19 +238,22 @@ class ConvertToScalarInfo { /// also declared as a vector, we do want to promote to a vector. bool HadAVector; + /// HadNonMemTransferAccess - True if there is at least one access to the + /// alloca that is not a MemTransferInst. We don't want to turn structs into + /// large integers unless there is some potential for optimization. + bool HadNonMemTransferAccess; + public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td) { - IsNotTrivial = false; - VectorTy = 0; - HadAVector = false; - } + : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0), + HadAVector(false), HadNonMemTransferAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset); + void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore); + bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, @@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { + unsigned BitWidth = AllocaSize * 8; + if (!HadAVector && !HadNonMemTransferAccess && + !TD.fitsInLegalInteger(BitWidth)) + return 0; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); + NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); @@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) /// so far at the offset specified by Offset (which is specified in bytes). /// -/// There are two cases we handle here: +/// There are three cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. /// Here we turn element accesses into insert/extract element operations. /// This promotes a <4 x float> with a store of float to the third element /// into a <4 x float> that uses insert element. -/// 2) A fully general blob of memory, which we turn into some (potentially +/// 2) A union of vector types with power-of-2 size differences, e.g. a float, +/// <2 x float> and <4 x float>. Here we turn element accesses into insert +/// and extract element operations, and <2 x float> accesses into a cast to +/// <2 x double>, an extract, and a cast back to <2 x float>. +/// 3) A fully general blob of memory, which we turn into some (potentially /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { +void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, + bool IsLoadOrStore) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. if (VectorTy && VectorTy->isVoidTy()) @@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { - // Remember if we saw a vector type. - HadAVector = true; - - if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { - // If we're storing/loading a vector of the right size, allow it as a - // vector. If this the first vector we see, remember the type so that - // we know the element size. If this is a subsequent access, ignore it - // even if it is a differing type but the same size. Worst case we can - // bitcast the resultant vectors. - if (VectorTy == 0) - VectorTy = VInTy; + if (MergeInVectorType(VInTy, Offset)) return; - } } else if (In->isFloatTy() || In->isDoubleTy() || (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // Full width accesses can be ignored, because they can always be turned + // into bitcasts. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (IsLoadOrStore && EltSize == AllocaSize) + return; + // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (VectorTy == 0 || - cast<VectorType>(VectorTy)->getElementType() - ->getPrimitiveSizeInBits()/8 == EltSize)) { - if (VectorTy == 0) + if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) { + if (!VectorTy) { VectorTy = VectorType::get(In, AllocaSize/EltSize); - return; + return; + } + + unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType() + ->getPrimitiveSizeInBits()/8; + if (EltSize == CurrentEltSize) + return; + + if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) + return; } } @@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { VectorTy = Type::getVoidTy(In->getContext()); } +/// MergeInVectorType - Handles the vector case of MergeInType, returning true +/// if the type was successfully merged and false otherwise. +bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, + uint64_t Offset) { + // Remember if we saw a vector type. + HadAVector = true; + + // TODO: Support nonzero offsets? + if (Offset != 0) + return false; + + // Only allow vectors that are a power-of-2 away from the size of the alloca. + if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) + return false; + + // If this the first vector we see, remember the type so that we know the + // element size. + if (!VectorTy) { + VectorTy = VInTy; + return true; + } + + unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); + unsigned InBitWidth = VInTy->getBitWidth(); + + // Vectors of the same size can be converted using a simple bitcast. + if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) + return true; + + const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType(); + const Type *InElementTy = cast<VectorType>(VInTy)->getElementType(); + + // Do not allow mixed integer and floating-point accesses from vectors of + // different sizes. + if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) + return false; + + if (ElementTy->isFloatingPointTy()) { + // Only allow floating-point vectors of different sizes if they have the + // same element type. + // TODO: This could be loosened a bit, but would anything benefit? + if (ElementTy != InElementTy) + return false; + + // There are no arbitrary-precision floating-point types, which limits the + // number of legal vector types with larger element types that we can form + // to bitcast and extract a subvector. + // TODO: We could support some more cases with mixed fp128 and double here. + if (!(BitWidth == 64 || BitWidth == 128) || + !(InBitWidth == 64 || InBitWidth == 128)) + return false; + } else { + assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " + "or floating-point."); + unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); + unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); + + // Do not allow integer types smaller than a byte or types whose widths are + // not a multiple of a byte. + if (BitWidth < 8 || InBitWidth < 8 || + BitWidth % 8 != 0 || InBitWidth % 8 != 0) + return false; + } + + // Pick the largest of the two vector types. + if (InBitWidth > BitWidth) + VectorTy = VInTy; + + return true; +} + /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all /// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable @@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) return false; - MergeInType(LI->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(LI->getType(), Offset, true); continue; } @@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; - MergeInType(SI->getOperand(0)->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(SI->getOperand(0)->getType(), Offset, true); continue; } @@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { !isa<ConstantInt>(MSI->getLength())) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, } } +/// getScaledElementType - Gets a scaled element type for a partial vector +/// access of an alloca. The input types must be integer or floating-point +/// scalar or vector types, and the resulting type is an integer, float or +/// double. +static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, + unsigned NewBitWidth) { + bool IsFP1 = Ty1->isFloatingPointTy() || + (Ty1->isVectorTy() && + cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy()); + bool IsFP2 = Ty2->isFloatingPointTy() || + (Ty2->isVectorTy() && + cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy()); + + LLVMContext &Context = Ty1->getContext(); + + // Prefer floating-point types over integer types, as integer types may have + // been created by earlier scalar replacement. + if (IsFP1 || IsFP2) { + if (NewBitWidth == 32) + return Type::getFloatTy(Context); + if (NewBitWidth == 64) + return Type::getDoubleTy(Context); + } + + return Type::getIntNTy(Context, NewBitWidth); +} + +/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector +/// to another vector of the same element type which has the same allocation +/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). +static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, + IRBuilder<> &Builder) { + const Type *FromType = FromVal->getType(); + const VectorType *FromVTy = cast<VectorType>(FromType); + const VectorType *ToVTy = cast<VectorType>(ToType); + assert((ToVTy->getElementType() == FromVTy->getElementType()) && + "Vectors must have the same element type"); + Value *UnV = UndefValue::get(FromType); + unsigned numEltsFrom = FromVTy->getNumElements(); + unsigned numEltsTo = ToVTy->getNumElements(); + + SmallVector<Constant*, 3> Args; + const Type* Int32Ty = Builder.getInt32Ty(); + unsigned minNumElts = std::min(numEltsFrom, numEltsTo); + unsigned i; + for (i=0; i != minNumElts; ++i) + Args.push_back(ConstantInt::get(Int32Ty, i)); + + if (i < numEltsTo) { + Constant* UnC = UndefValue::get(Int32Ty); + for (; i != numEltsTo; ++i) + Args.push_back(UnC); + } + Constant *Mask = ConstantVector::get(Args); + return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); +} + /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer /// or vector value FromVal, extracting the bits from the offset specified by /// Offset. This returns the value, which is of type ToType. @@ -589,14 +734,46 @@ Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - if (FromVal->getType() == ToType && Offset == 0) + const Type *FromType = FromVal->getType(); + if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { - if (ToType->isVectorTy()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); + if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + unsigned ToTypeSize = TD.getTypeAllocSize(ToType); + if (ToTypeSize == AllocaSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (FromType->getPrimitiveSizeInBits() == + ToType->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + else + return CreateShuffleVectorCast(FromVal, ToType, Builder); + } + + if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " + "of a smaller vector type at a nonzero offset."); + + const Type *CastElementTy = getScaledElementType(FromType, ToType, + ToTypeSize * 8); + unsigned NumCastVectorElements = AllocaSize / ToTypeSize; + + LLVMContext &Context = FromVal->getContext(); + const Type *CastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Extract, ToType, "tmp"); + } // Otherwise it must be an element access. unsigned Elt = 0; @@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); + if (ValSize == VecSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (VTy->getPrimitiveSizeInBits() == + SV->getType()->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + else + return CreateShuffleVectorCast(SV, VTy, Builder); + } - uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + if (isPowerOf2_64(VecSize / ValSize)) { + assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " + "value of a smaller vector type at a nonzero offset."); - // Must be an element insertion. - unsigned Elt = Offset/EltSize; + const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), + ValSize); + unsigned NumCastVectorElements = VecSize / ValSize; - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + LLVMContext &Context = SV->getContext(); + const Type *OldCastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); - SV = Builder.CreateInsertElement(Old, SV, + Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Insert = + Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Insert, AllocaType, "tmp"); + } + + // Must be an element insertion. + assert(SV->getType() == VTy->getElementType()); + uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + unsigned Elt = Offset/EltSize; + return Builder.CreateInsertElement(Old, SV, ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), "tmp"); - return SV; } // If SV is a first-class aggregate value, insert each value recursively. @@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { } const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); - PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), + PN->getName()+".ld", PN); // Get the TBAA tag and alignment to use from one of the loads. It doesn't // matter which one we get and if any differ, it doesn't matter. |