diff options
Diffstat (limited to 'lib/Transforms/Scalar/ScalarReplAggregates.cpp')
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 598 |
1 files changed, 257 insertions, 341 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 7d6349c..c6d9123 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -129,11 +129,11 @@ namespace { AllocaInfo &Info); void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, AllocaInfo &Info, + Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess); - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); - uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, - const Type *&IdxTy); + bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size); + uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, + Type *&IdxTy); void DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList); @@ -145,6 +145,9 @@ namespace { SmallVector<AllocaInst*, 32> &NewElts); void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts); + void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, + uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts); void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); @@ -253,7 +256,7 @@ class ConvertToScalarInfo { /// VectorTy - This tracks the type that we should promote the vector to if /// it is possible to turn it into a vector. This starts out null, and if it /// isn't possible to turn into a vector type, it gets set to VoidTy. - const VectorType *VectorTy; + VectorType *VectorTy; /// HadNonMemTransferAccess - True if there is at least one access to the /// alloca that is not a MemTransferInst. We don't want to turn structs into @@ -269,11 +272,11 @@ public: private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset); - bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); + void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); + bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); - Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, + Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, uint64_t Offset, IRBuilder<> &Builder); Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, uint64_t Offset, IRBuilder<> &Builder); @@ -295,8 +298,6 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { if (ScalarKind == Unknown) ScalarKind = Integer; - // FIXME: It should be possible to promote the vector type up to the alloca's - // size. if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) ScalarKind = Integer; @@ -306,7 +307,7 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // random stuff that doesn't use vectors (e.g. <9 x double>) because then // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. - const Type *NewTy; + Type *NewTy; if (ScalarKind == Vector) { assert(VectorTy && "Missing type for vector scalar."); DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " @@ -331,20 +332,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// (VectorTy) so far at the offset specified by Offset (which is specified in /// bytes). /// -/// There are three cases we handle here: +/// There are two cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. /// Here we turn element accesses into insert/extract element operations. /// This promotes a <4 x float> with a store of float to the third element /// into a <4 x float> that uses insert element. -/// 2) A union of vector types with power-of-2 size differences, e.g. a float, -/// <2 x float> and <4 x float>. Here we turn element accesses into insert -/// and extract element operations, and <2 x float> accesses into a cast to -/// <2 x double>, an extract, and a cast back to <2 x float>. -/// 3) A fully general blob of memory, which we turn into some (potentially +/// 2) A fully general blob of memory, which we turn into some (potentially /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, +void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, uint64_t Offset) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. @@ -355,7 +352,7 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. - if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { + if (VectorType *VInTy = dyn_cast<VectorType>(In)) { if (MergeInVectorType(VInTy, Offset)) return; } else if (In->isFloatTy() || In->isDoubleTy() || @@ -371,20 +368,13 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, // if the implied vector agrees with what we already have and if Offset is // compatible with it. if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) { + (!VectorTy || EltSize == VectorTy->getElementType() + ->getPrimitiveSizeInBits()/8)) { if (!VectorTy) { ScalarKind = ImplicitVector; VectorTy = VectorType::get(In, AllocaSize/EltSize); - return; } - - unsigned CurrentEltSize = VectorTy->getElementType() - ->getPrimitiveSizeInBits()/8; - if (EltSize == CurrentEltSize) - return; - - if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) - return; + return; } } @@ -395,74 +385,21 @@ void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In, /// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, /// returning true if the type was successfully merged and false otherwise. -bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, +bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, uint64_t Offset) { - // TODO: Support nonzero offsets? - if (Offset != 0) - return false; - - // Only allow vectors that are a power-of-2 away from the size of the alloca. - if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) - return false; - - // If this the first vector we see, remember the type so that we know the - // element size. - if (!VectorTy) { + if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { + // If we're storing/loading a vector of the right size, allow it as a + // vector. If this the first vector we see, remember the type so that + // we know the element size. If this is a subsequent access, ignore it + // even if it is a differing type but the same size. Worst case we can + // bitcast the resultant vectors. + if (!VectorTy) + VectorTy = VInTy; ScalarKind = Vector; - VectorTy = VInTy; return true; } - unsigned BitWidth = VectorTy->getBitWidth(); - unsigned InBitWidth = VInTy->getBitWidth(); - - // Vectors of the same size can be converted using a simple bitcast. - if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) { - ScalarKind = Vector; - return true; - } - - const Type *ElementTy = VectorTy->getElementType(); - const Type *InElementTy = VInTy->getElementType(); - - // Do not allow mixed integer and floating-point accesses from vectors of - // different sizes. - if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) - return false; - - if (ElementTy->isFloatingPointTy()) { - // Only allow floating-point vectors of different sizes if they have the - // same element type. - // TODO: This could be loosened a bit, but would anything benefit? - if (ElementTy != InElementTy) - return false; - - // There are no arbitrary-precision floating-point types, which limits the - // number of legal vector types with larger element types that we can form - // to bitcast and extract a subvector. - // TODO: We could support some more cases with mixed fp128 and double here. - if (!(BitWidth == 64 || BitWidth == 128) || - !(InBitWidth == 64 || InBitWidth == 128)) - return false; - } else { - assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " - "or floating-point."); - unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); - unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); - - // Do not allow integer types smaller than a byte or types whose widths are - // not a multiple of a byte. - if (BitWidth < 8 || InBitWidth < 8 || - BitWidth % 8 != 0 || InBitWidth % 8 != 0) - return false; - } - - // Pick the largest of the two vector types. - ScalarKind = Vector; - if (InBitWidth > BitWidth) - VectorTy = VInTy; - - return true; + return false; } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all @@ -480,7 +417,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // Don't break volatile loads. - if (LI->isVolatile()) + if (!LI->isSimple()) return false; // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) @@ -492,7 +429,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Storing the pointer, not into the value? - if (SI->getOperand(0) == V || SI->isVolatile()) return false; + if (SI->getOperand(0) == V || !SI->isSimple()) return false; // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; @@ -502,7 +439,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { } if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { - IsNotTrivial = true; // Can't be mem2reg'd. + if (!onlyUsedByLifetimeMarkers(BCI)) + IsNotTrivial = true; // Can't be mem2reg'd. if (!CanConvertToScalar(BCI, Offset)) return false; continue; @@ -516,7 +454,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); // See if all uses can be converted. if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; @@ -560,6 +498,14 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { continue; } + // If this is a lifetime intrinsic, we can handle it. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + continue; + } + } + // Otherwise, we cannot handle this! return false; } @@ -589,7 +535,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); + Indices); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); continue; @@ -599,7 +545,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (LoadInst *LI = dyn_cast<LoadInst>(User)) { // The load is a bit extract from NewAI shifted right by Offset bits. - Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); + Value *LoadedVal = Builder.CreateLoad(NewAI); Value *NewLoadVal = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); LI->replaceAllUsesWith(NewLoadVal); @@ -668,8 +614,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // pointer (bitcasted), then a store to our new alloca. assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); Value *SrcPtr = MTI->getSource(); - const PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); - const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); + PointerType* AIPTy = cast<PointerType>(NewAI->getType()); if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { AIPTy = PointerType::get(AIPTy->getElementType(), SPTy->getAddressSpace()); @@ -685,8 +631,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); - const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); - const PointerType* AIPTy = cast<PointerType>(NewAI->getType()); + PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); + PointerType* AIPTy = cast<PointerType>(NewAI->getType()); if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { AIPTy = PointerType::get(AIPTy->getElementType(), DPTy->getAddressSpace()); @@ -703,65 +649,18 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, continue; } - llvm_unreachable("Unsupported operation!"); - } -} + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + // There's no need to preserve these, as the resulting alloca will be + // converted to a register anyways. + II->eraseFromParent(); + continue; + } + } -/// getScaledElementType - Gets a scaled element type for a partial vector -/// access of an alloca. The input types must be integer or floating-point -/// scalar or vector types, and the resulting type is an integer, float or -/// double. -static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, - unsigned NewBitWidth) { - bool IsFP1 = Ty1->isFloatingPointTy() || - (Ty1->isVectorTy() && - cast<VectorType>(Ty1)->getElementType()->isFloatingPointTy()); - bool IsFP2 = Ty2->isFloatingPointTy() || - (Ty2->isVectorTy() && - cast<VectorType>(Ty2)->getElementType()->isFloatingPointTy()); - - LLVMContext &Context = Ty1->getContext(); - - // Prefer floating-point types over integer types, as integer types may have - // been created by earlier scalar replacement. - if (IsFP1 || IsFP2) { - if (NewBitWidth == 32) - return Type::getFloatTy(Context); - if (NewBitWidth == 64) - return Type::getDoubleTy(Context); + llvm_unreachable("Unsupported operation!"); } - - return Type::getIntNTy(Context, NewBitWidth); -} - -/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector -/// to another vector of the same element type which has the same allocation -/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). -static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, - IRBuilder<> &Builder) { - const Type *FromType = FromVal->getType(); - const VectorType *FromVTy = cast<VectorType>(FromType); - const VectorType *ToVTy = cast<VectorType>(ToType); - assert((ToVTy->getElementType() == FromVTy->getElementType()) && - "Vectors must have the same element type"); - Value *UnV = UndefValue::get(FromType); - unsigned numEltsFrom = FromVTy->getNumElements(); - unsigned numEltsTo = ToVTy->getNumElements(); - - SmallVector<Constant*, 3> Args; - const Type* Int32Ty = Builder.getInt32Ty(); - unsigned minNumElts = std::min(numEltsFrom, numEltsTo); - unsigned i; - for (i=0; i != minNumElts; ++i) - Args.push_back(ConstantInt::get(Int32Ty, i)); - - if (i < numEltsTo) { - Constant* UnC = UndefValue::get(Int32Ty); - for (; i != numEltsTo; ++i) - Args.push_back(UnC); - } - Constant *Mask = ConstantVector::get(Args); - return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); } /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer @@ -775,50 +674,20 @@ static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, /// Offset is an offset from the original alloca, in bits that need to be /// shifted to the right. Value *ConvertToScalarInfo:: -ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, +ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - const Type *FromType = FromVal->getType(); + Type *FromType = FromVal->getType(); if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) { + if (VectorType *VTy = dyn_cast<VectorType>(FromType)) { unsigned FromTypeSize = TD.getTypeAllocSize(FromType); unsigned ToTypeSize = TD.getTypeAllocSize(ToType); - if (FromTypeSize == ToTypeSize) { - // If the two types have the same primitive size, use a bit cast. - // Otherwise, it is two vectors with the same element type that has - // the same allocation size but different number of elements so use - // a shuffle vector. - if (FromType->getPrimitiveSizeInBits() == - ToType->getPrimitiveSizeInBits()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); - else - return CreateShuffleVectorCast(FromVal, ToType, Builder); - } - - if (isPowerOf2_64(FromTypeSize / ToTypeSize)) { - assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " - "of a smaller vector type at a nonzero offset."); - - const Type *CastElementTy = getScaledElementType(FromType, ToType, - ToTypeSize * 8); - unsigned NumCastVectorElements = FromTypeSize / ToTypeSize; - - LLVMContext &Context = FromVal->getContext(); - const Type *CastTy = VectorType::get(CastElementTy, - NumCastVectorElements); - Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); - - unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); - unsigned Elt = Offset/EltSize; - assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); - Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( - Type::getInt32Ty(Context), Elt), "tmp"); - return Builder.CreateBitCast(Extract, ToType, "tmp"); - } + if (FromTypeSize == ToTypeSize) + return Builder.CreateBitCast(FromVal, ToType); // Otherwise it must be an element access. unsigned Elt = 0; @@ -828,40 +697,39 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( - Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); + Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt)); if (V->getType() != ToType) - V = Builder.CreateBitCast(V, ToType, "tmp"); + V = Builder.CreateBitCast(V, ToType); return V; } // If ToType is a first class aggregate, extract out each of the pieces and // use insertvalue's to form the FCA. - if (const StructType *ST = dyn_cast<StructType>(ToType)) { + if (StructType *ST = dyn_cast<StructType>(ToType)) { const StructLayout &Layout = *TD.getStructLayout(ST); Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), Builder); - Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } - if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { + if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), Offset+i*EltSize, Builder); - Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; } // Otherwise, this must be a union that was converted to an integer value. - const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); + IntegerType *NTy = cast<IntegerType>(FromVal->getType()); // If this is a big-endian system and the load is narrower than the // full alloca type, we need to do a shift to get the right bits. @@ -881,33 +749,31 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // only some bits are used. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateLShr(FromVal, - ConstantInt::get(FromVal->getType(), - ShAmt), "tmp"); + ConstantInt::get(FromVal->getType(), ShAmt)); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateShl(FromVal, - ConstantInt::get(FromVal->getType(), - -ShAmt), "tmp"); + ConstantInt::get(FromVal->getType(), -ShAmt)); // Finally, unconditionally truncate the integer to the right width. unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) FromVal = Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth), "tmp"); + LIBitWidth)); else if (LIBitWidth > NTy->getBitWidth()) FromVal = Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), - LIBitWidth), "tmp"); + LIBitWidth)); // If the result is an integer, this is a trunc or bitcast. if (ToType->isIntegerTy()) { // Should be done. } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { // Just do a bitcast, we know the sizes match up. - FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); + FromVal = Builder.CreateBitCast(FromVal, ToType); } else { // Otherwise must be a pointer. - FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); + FromVal = Builder.CreateIntToPtr(FromVal, ToType); } assert(FromVal->getType() == ToType && "Didn't convert right?"); return FromVal; @@ -927,65 +793,30 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, uint64_t Offset, IRBuilder<> &Builder) { // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. - const Type *AllocaType = Old->getType(); + Type *AllocaType = Old->getType(); LLVMContext &Context = Old->getContext(); - if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { + if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) { - // If the two types have the same primitive size, use a bit cast. - // Otherwise, it is two vectors with the same element type that has - // the same allocation size but different number of elements so use - // a shuffle vector. - if (VTy->getPrimitiveSizeInBits() == - SV->getType()->getPrimitiveSizeInBits()) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); - else - return CreateShuffleVectorCast(SV, VTy, Builder); - } - - if (isPowerOf2_64(VecSize / ValSize)) { - assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " - "value of a smaller vector type at a nonzero offset."); - - const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), - ValSize); - unsigned NumCastVectorElements = VecSize / ValSize; - - LLVMContext &Context = SV->getContext(); - const Type *OldCastTy = VectorType::get(CastElementTy, - NumCastVectorElements); - Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); - - Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); - - unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); - unsigned Elt = Offset/EltSize; - assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); - Value *Insert = - Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( - Type::getInt32Ty(Context), Elt), "tmp"); - return Builder.CreateBitCast(Insert, AllocaType, "tmp"); - } + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType); // Must be an element insertion. assert(SV->getType() == VTy->getElementType()); uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); unsigned Elt = Offset/EltSize; - return Builder.CreateInsertElement(Old, SV, - ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), - "tmp"); + return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); } // If SV is a first-class aggregate value, insert each value recursively. - if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { + if (StructType *ST = dyn_cast<StructType>(SV->getType())) { const StructLayout &Layout = *TD.getStructLayout(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), Builder); @@ -993,10 +824,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, return Old; } - if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { + if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); } return Old; @@ -1009,20 +840,19 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType); if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) - SV = Builder.CreateBitCast(SV, - IntegerType::get(SV->getContext(),SrcWidth), "tmp"); + SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); else if (SV->getType()->isPointerTy()) - SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp"); + SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext())); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { if (SV->getType()->getPrimitiveSizeInBits() < AllocaType->getPrimitiveSizeInBits()) - SV = Builder.CreateZExt(SV, AllocaType, "tmp"); + SV = Builder.CreateZExt(SV, AllocaType); else { // Truncation may be needed if storing more than the alloca can hold // (undefined behavior). - SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); + SV = Builder.CreateTrunc(SV, AllocaType); SrcWidth = DestWidth; SrcStoreWidth = DestStoreWidth; } @@ -1045,12 +875,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // only some bits in the structure are set. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), - ShAmt), "tmp"); + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt)); Mask <<= ShAmt; } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), - -ShAmt), "tmp"); + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt)); Mask = Mask.lshr(-ShAmt); } @@ -1196,7 +1024,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || LI->isVolatile()) return false; + if (LI == 0 || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. @@ -1237,7 +1065,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); UI != UE; ++UI) { LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || LI->isVolatile()) return false; + if (LI == 0 || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is a // common case that happens when instcombine merges two loads through a PHI. @@ -1258,17 +1086,21 @@ static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { // trapping load in the predecessor if it is a critical edge. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); + Value *InVal = PN->getIncomingValue(i); + + // If the terminator of the predecessor has side-effects (an invoke), + // there is no safe place to put a load in the predecessor. + if (Pred->getTerminator()->mayHaveSideEffects()) + return false; + + // If the value is produced by the terminator of the predecessor + // (an invoke), there is no valid place to put a load in the predecessor. + if (Pred->getTerminator() == InVal) + return false; // If the predecessor has a single successor, then the edge isn't critical. if (Pred->getTerminator()->getNumSuccessors() == 1) continue; - - Value *InVal = PN->getIncomingValue(i); - - // If the InVal is an invoke in the pred, we can't put a load on the edge. - if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) - if (II->getParent() == Pred) - return false; // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. @@ -1295,13 +1127,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { UI != UE; ++UI) { User *U = *UI; if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return false; continue; } if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - if (SI->getOperand(0) == AI || SI->isVolatile()) + if (SI->getOperand(0) == AI || !SI->isSimple()) return false; // Don't allow a store OF the AI, only INTO the AI. continue; } @@ -1343,6 +1175,13 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { continue; } + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + if (onlyUsedByLifetimeMarkers(BCI)) { + InstsToRewrite.insert(BCI); + continue; + } + } + return false; } @@ -1354,6 +1193,18 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { // If we have instructions that need to be rewritten for this to be promotable // take care of it now. for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) { + // This could only be a bitcast used by nothing but lifetime intrinsics. + for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end(); + I != E;) { + Use &U = I.getUse(); + ++I; + cast<Instruction>(U.getUser())->eraseFromParent(); + } + BCI->eraseFromParent(); + continue; + } + if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) { // Selects in InstsToRewrite only have load uses. Rewrite each as two // loads with a new select. @@ -1393,7 +1244,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { continue; } - const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); + Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), PN->getName()+".ld", PN); @@ -1483,13 +1334,13 @@ bool SROA::performPromotion(Function &F) { /// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for /// SROA. It must be a struct or array type with a small number of elements. static bool ShouldAttemptScalarRepl(AllocaInst *AI) { - const Type *T = AI->getAllocatedType(); + Type *T = AI->getAllocatedType(); // Do not promote any struct into more than 32 separate vars. - if (const StructType *ST = dyn_cast<StructType>(T)) + if (StructType *ST = dyn_cast<StructType>(T)) return ST->getNumElements() <= 32; // Arrays are much less likely to be safe for SROA; only consider // them if they are very small. - if (const ArrayType *AT = dyn_cast<ArrayType>(T)) + if (ArrayType *AT = dyn_cast<ArrayType>(T)) return AT->getNumElements() <= 8; return false; } @@ -1594,7 +1445,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, std::vector<AllocaInst*> &WorkList) { DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector<AllocaInst*, 32> ElementAllocas; - if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, @@ -1604,9 +1455,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI, WorkList.push_back(NA); // Add to worklist for recursive processing } } else { - const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); + ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); ElementAllocas.reserve(AT->getNumElements()); - const Type *ElTy = AT->getElementType(); + Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); @@ -1670,22 +1521,26 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, UI.getOperandNo() == 0, Info, MI, true /*AllowWholeAccess*/); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return MarkUnsafe(Info, User); - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer - if (SI->isVolatile() || SI->getOperand(0) == I) + if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - const Type *SIType = SI->getOperand(0)->getType(); + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, true /*AllowWholeAccess*/); Info.hasALoadOrStore = true; + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return MarkUnsafe(Info, User); } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { isSafePHISelectUseForScalarRepl(User, Offset, Info); } else { @@ -1725,19 +1580,19 @@ void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, return MarkUnsafe(Info, User); isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - if (LI->isVolatile()) + if (!LI->isSimple()) return MarkUnsafe(Info, User); - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), LIType, false, Info, LI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { // Store is ok if storing INTO the pointer, not storing the pointer - if (SI->isVolatile() || SI->getOperand(0) == I) + if (!SI->isSimple() || SI->getOperand(0) == I) return MarkUnsafe(Info, User); - const Type *SIType = SI->getOperand(0)->getType(); + Type *SIType = SI->getOperand(0)->getType(); isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), SIType, true, Info, SI, false /*AllowWholeAccess*/); Info.hasALoadOrStore = true; @@ -1776,8 +1631,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, // Compute the offset due to this GEP and check if the alloca has a // component element at that offset. SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) MarkUnsafe(Info, GEPI); } @@ -1786,14 +1640,14 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, /// elements of the same type (which is always true for arrays). If so, /// return true with NumElts and EltTy set to the number of elements and the /// element type, respectively. -static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, - const Type *&EltTy) { - if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { +static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, + Type *&EltTy) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) { NumElts = AT->getNumElements(); EltTy = (NumElts == 0 ? 0 : AT->getElementType()); return true; } - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { NumElts = ST->getNumContainedTypes(); EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); for (unsigned n = 1; n < NumElts; ++n) { @@ -1807,12 +1661,12 @@ static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts, /// isCompatibleAggregate - Check if T1 and T2 are either the same type or are /// "homogeneous" aggregates with the same element type and number of elements. -static bool isCompatibleAggregate(const Type *T1, const Type *T2) { +static bool isCompatibleAggregate(Type *T1, Type *T2) { if (T1 == T2) return true; unsigned NumElts1, NumElts2; - const Type *EltTy1, *EltTy2; + Type *EltTy1, *EltTy2; if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && isHomogeneousAggregate(T2, NumElts2, EltTy2) && NumElts1 == NumElts2 && @@ -1830,7 +1684,7 @@ static bool isCompatibleAggregate(const Type *T1, const Type *T2) { /// If AllowWholeAccess is true, then this allows uses of the entire alloca as a /// unit. If false, it only allows accesses known to be in a single element. void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, - const Type *MemOpType, bool isStore, + Type *MemOpType, bool isStore, AllocaInfo &Info, Instruction *TheAccess, bool AllowWholeAccess) { // Check if this is a load/store of the entire alloca. @@ -1857,7 +1711,7 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, } } // Check if the offset/size correspond to a component within the alloca type. - const Type *T = Info.AI->getAllocatedType(); + Type *T = Info.AI->getAllocatedType(); if (TypeHasComponent(T, Offset, MemSize)) { Info.hasSubelementAccess = true; return; @@ -1868,16 +1722,16 @@ void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, /// TypeHasComponent - Return true if T has a component type with the /// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { - const Type *EltTy; +bool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { + Type *EltTy; uint64_t EltSize; - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { const StructLayout *Layout = TD->getStructLayout(ST); unsigned EltIdx = Layout->getElementContainingOffset(Offset); EltTy = ST->getContainedType(EltIdx); EltSize = TD->getTypeAllocSize(EltTy); Offset -= Layout->getElementOffset(EltIdx); - } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { + } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { EltTy = AT->getElementType(); EltSize = TD->getTypeAllocSize(EltTy); if (Offset >= AT->getNumElements() * EltSize) @@ -1924,9 +1778,17 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, // address operand will be updated, so nothing else needs to be done. continue; } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + RewriteLifetimeIntrinsic(II, AI, Offset, NewElts); + } + continue; + } if (LoadInst *LI = dyn_cast<LoadInst>(User)) { - const Type *LIType = LI->getType(); + Type *LIType = LI->getType(); if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { // Replace: @@ -1956,7 +1818,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, if (StoreInst *SI = dyn_cast<StoreInst>(User)) { Value *Val = SI->getOperand(0); - const Type *SIType = Val->getType(); + Type *SIType = Val->getType(); if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { // Replace: // store { i32, i32 } %val, { i32, i32 }* %alloc @@ -2026,10 +1888,10 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, /// Sets T to the type of the element and Offset to the offset within that /// element. IdxTy is set to the type of the index result to be used in a /// GEP instruction. -uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, - const Type *&IdxTy) { +uint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, + Type *&IdxTy) { uint64_t Idx = 0; - if (const StructType *ST = dyn_cast<StructType>(T)) { + if (StructType *ST = dyn_cast<StructType>(T)) { const StructLayout *Layout = TD->getStructLayout(ST); Idx = Layout->getElementContainingOffset(Offset); T = ST->getContainedType(Idx); @@ -2037,7 +1899,7 @@ uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, IdxTy = Type::getInt32Ty(T->getContext()); return Idx; } - const ArrayType *AT = cast<ArrayType>(T); + ArrayType *AT = cast<ArrayType>(T); T = AT->getElementType(); uint64_t EltSize = TD->getTypeAllocSize(T); Idx = Offset / EltSize; @@ -2053,13 +1915,12 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, SmallVector<AllocaInst*, 32> &NewElts) { uint64_t OldOffset = Offset; SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); RewriteForScalarRepl(GEPI, AI, Offset, NewElts); - const Type *T = AI->getAllocatedType(); - const Type *IdxTy; + Type *T = AI->getAllocatedType(); + Type *IdxTy; uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); if (GEPI->getOperand(0) == AI) OldIdx = ~0ULL; // Force the GEP to be rewritten. @@ -2073,7 +1934,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, if (Idx == OldIdx) return; - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + Type *i32Ty = Type::getInt32Ty(AI->getContext()); SmallVector<Value*, 8> NewArgs; NewArgs.push_back(Constant::getNullValue(i32Ty)); while (EltOffset != 0) { @@ -2082,8 +1943,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, } Instruction *Val = NewElts[Idx]; if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), - NewArgs.end(), "", GEPI); + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); Val->takeName(GEPI); } if (Val->getType() != GEPI->getType()) @@ -2092,6 +1952,62 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, DeadInsts.push_back(GEPI); } +/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it +/// to mark the lifetime of the scalarized memory. +void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, + uint64_t Offset, + SmallVector<AllocaInst*, 32> &NewElts) { + ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0)); + // Put matching lifetime markers on everything from Offset up to + // Offset+OldSize. + Type *AIType = AI->getAllocatedType(); + uint64_t NewOffset = Offset; + Type *IdxTy; + uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy); + + IRBuilder<> Builder(II); + uint64_t Size = OldSize->getLimitedValue(); + + if (NewOffset) { + // Splice the first element and index 'NewOffset' bytes in. SROA will + // split the alloca again later. + Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); + V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); + + IdxTy = NewElts[Idx]->getAllocatedType(); + uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset; + if (EltSize > Size) { + EltSize = Size; + Size = 0; + } else { + Size -= EltSize; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize)); + else + Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize)); + ++Idx; + } + + for (; Idx != NewElts.size() && Size; ++Idx) { + IdxTy = NewElts[Idx]->getAllocatedType(); + uint64_t EltSize = TD->getTypeAllocSize(IdxTy); + if (EltSize > Size) { + EltSize = Size; + Size = 0; + } else { + Size -= EltSize; + } + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(NewElts[Idx], + Builder.getInt64(EltSize)); + else + Builder.CreateLifetimeEnd(NewElts[Idx], + Builder.getInt64(EltSize)); + } + DeadInsts.push_back(II); +} + /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, @@ -2139,7 +2055,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If the pointer is not the right type, insert a bitcast to the right // type. - const Type *NewTy = + Type *NewTy = PointerType::get(AI->getType()->getElementType(), AddrSpace); if (OtherPtr->getType() != NewTy) @@ -2159,16 +2075,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, OtherPtr->getName()+"."+Twine(i), MI); uint64_t EltOffset; - const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); - const Type *OtherTy = OtherPtrTy->getElementType(); - if (const StructType *ST = dyn_cast<StructType>(OtherTy)) { + PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); + Type *OtherTy = OtherPtrTy->getElementType(); + if (StructType *ST = dyn_cast<StructType>(OtherTy)) { EltOffset = TD->getStructLayout(ST)->getElementOffset(i); } else { - const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); + Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); EltOffset = TD->getTypeAllocSize(EltTy)*i; } @@ -2181,7 +2097,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } Value *EltPtr = NewElts[i]; - const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); + Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); // If we got down to a scalar, insert a load or store as appropriate. if (EltTy->isSingleValueType()) { @@ -2207,7 +2123,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. - const Type *ValTy = EltTy->getScalarType(); + Type *ValTy = EltTy->getScalarType(); // Construct an integer with the right value. unsigned EltSize = TD->getTypeSizeInBits(ValTy); @@ -2228,8 +2144,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, assert(StoreVal->getType() == ValTy && "Type mismatch!"); // If the requested value was a vector constant, create it. - if (EltTy != ValTy) { - unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); + if (EltTy->isVectorTy()) { + unsigned NumElts = cast<VectorType>(EltTy)->getNumElements(); SmallVector<Constant*, 16> Elts(NumElts, StoreVal); StoreVal = ConstantVector::get(Elts); } @@ -2271,7 +2187,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getAllocatedType(); + Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); IRBuilder<> Builder(SI); @@ -2286,12 +2202,12 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. - if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { const StructLayout *Layout = TD->getStructLayout(EltSTy); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Get the number of bits to shift SrcVal to get the value. - const Type *FieldTy = EltSTy->getElementType(i); + Type *FieldTy = EltSTy->getElementType(i); uint64_t Shift = Layout->getElementOffsetInBits(i); if (TD->isBigEndian()) @@ -2327,8 +2243,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, } } else { - const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); - const Type *ArrayEltTy = ATy->getElementType(); + ArrayType *ATy = cast<ArrayType>(AllocaEltTy); + Type *ArrayEltTy = ATy->getElementType(); uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); @@ -2384,7 +2300,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getAllocatedType(); + Type *AllocaEltTy = AI->getAllocatedType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI @@ -2394,10 +2310,10 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // have different ways to compute the element offset. const StructLayout *Layout = 0; uint64_t ArrayEltBitOffset = 0; - if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { + if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { Layout = TD->getStructLayout(EltSTy); } else { - const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); + Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); } @@ -2408,14 +2324,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // Load the value from the alloca. If the NewElt is an aggregate, cast // the pointer to an integer of the same size before doing the load. Value *SrcField = NewElts[i]; - const Type *FieldTy = + Type *FieldTy = cast<PointerType>(SrcField->getType())->getElementType(); uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), FieldSizeBits); if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && !FieldTy->isVectorTy()) @@ -2468,14 +2384,14 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, /// HasPadding - Return true if the specified type has any structure or /// alignment padding in between the elements that would be split apart /// by SROA; return false otherwise. -static bool HasPadding(const Type *Ty, const TargetData &TD) { - if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { +static bool HasPadding(Type *Ty, const TargetData &TD) { + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { Ty = ATy->getElementType(); return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } // SROA currently handles only Arrays and Structs. - const StructType *STy = cast<StructType>(Ty); + StructType *STy = cast<StructType>(Ty); const StructLayout *SL = TD.getStructLayout(STy); unsigned PrevFieldBitOffset = 0; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { @@ -2530,7 +2446,7 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // and fusion code. if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { // If the struct/array just has one element, use basic SRoA. - if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { + if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { if (ST->getNumElements() > 1) return false; } else { if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) @@ -2576,7 +2492,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (LoadInst *LI = dyn_cast<LoadInst>(U)) { // Ignore non-volatile loads, they are always ok. - if (LI->isVolatile()) return false; + if (!LI->isSimple()) return false; continue; } |