summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Transforms/Scalar/SROA.cpp')
-rw-r--r--contrib/llvm/lib/Transforms/Scalar/SROA.cpp1489
1 files changed, 777 insertions, 712 deletions
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index 640ea31..f6bb365 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -25,44 +25,47 @@
#define DEBUG_TYPE "sroa"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DIBuilder.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DIBuilder.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstVisitor.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
-STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
-STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
+STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
+STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions");
+STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found");
+STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses");
+STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
+STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
-STATISTIC(NumDeleted, "Number of instructions deleted");
-STATISTIC(NumVectorized, "Number of vectorized aggregates");
+STATISTIC(NumDeleted, "Number of instructions deleted");
+STATISTIC(NumVectorized, "Number of vectorized aggregates");
/// Hidden option to force the pass to not use DomTree and mem2reg, instead
/// forming SSA values through the SSAUpdater infrastructure.
@@ -70,112 +73,167 @@ static cl::opt<bool>
ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden);
namespace {
-/// \brief Alloca partitioning representation.
-///
-/// This class represents a partitioning of an alloca into slices, and
-/// information about the nature of uses of each slice of the alloca. The goal
-/// is that this information is sufficient to decide if and how to split the
-/// alloca apart and replace slices with scalars. It is also intended that this
-/// structure can capture the relevant information needed both to decide about
-/// and to enact these transformations.
-class AllocaPartitioning {
+/// \brief A custom IRBuilder inserter which prefixes all names if they are
+/// preserved.
+template <bool preserveNames = true>
+class IRBuilderPrefixedInserter :
+ public IRBuilderDefaultInserter<preserveNames> {
+ std::string Prefix;
+
public:
- /// \brief A common base class for representing a half-open byte range.
- struct ByteRange {
- /// \brief The beginning offset of the range.
- uint64_t BeginOffset;
+ void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
- /// \brief The ending offset, not included in the range.
- uint64_t EndOffset;
+protected:
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
+ IRBuilderDefaultInserter<preserveNames>::InsertHelper(
+ I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
+ }
+};
- ByteRange() : BeginOffset(), EndOffset() {}
- ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
- : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
+// Specialization for not preserving the name is trivial.
+template <>
+class IRBuilderPrefixedInserter<false> :
+ public IRBuilderDefaultInserter<false> {
+public:
+ void SetNamePrefix(const Twine &P) {}
+};
- /// \brief Support for ordering ranges.
- ///
- /// This provides an ordering over ranges such that start offsets are
- /// always increasing, and within equal start offsets, the end offsets are
- /// decreasing. Thus the spanning range comes first in a cluster with the
- /// same start position.
- bool operator<(const ByteRange &RHS) const {
- if (BeginOffset < RHS.BeginOffset) return true;
- if (BeginOffset > RHS.BeginOffset) return false;
- if (EndOffset > RHS.EndOffset) return true;
- return false;
- }
+/// \brief Provide a typedef for IRBuilder that drops names in release builds.
+#ifndef NDEBUG
+typedef llvm::IRBuilder<true, ConstantFolder,
+ IRBuilderPrefixedInserter<true> > IRBuilderTy;
+#else
+typedef llvm::IRBuilder<false, ConstantFolder,
+ IRBuilderPrefixedInserter<false> > IRBuilderTy;
+#endif
+}
- /// \brief Support comparison with a single offset to allow binary searches.
- friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
- return LHS.BeginOffset < RHSOffset;
- }
+namespace {
+/// \brief A common base class for representing a half-open byte range.
+struct ByteRange {
+ /// \brief The beginning offset of the range.
+ uint64_t BeginOffset;
- friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
- const ByteRange &RHS) {
- return LHSOffset < RHS.BeginOffset;
- }
+ /// \brief The ending offset, not included in the range.
+ uint64_t EndOffset;
- bool operator==(const ByteRange &RHS) const {
- return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
- }
- bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
- };
+ ByteRange() : BeginOffset(), EndOffset() {}
+ ByteRange(uint64_t BeginOffset, uint64_t EndOffset)
+ : BeginOffset(BeginOffset), EndOffset(EndOffset) {}
- /// \brief A partition of an alloca.
+ /// \brief Support for ordering ranges.
///
- /// This structure represents a contiguous partition of the alloca. These are
- /// formed by examining the uses of the alloca. During formation, they may
- /// overlap but once an AllocaPartitioning is built, the Partitions within it
- /// are all disjoint.
- struct Partition : public ByteRange {
- /// \brief Whether this partition is splittable into smaller partitions.
- ///
- /// We flag partitions as splittable when they are formed entirely due to
- /// accesses by trivially splittable operations such as memset and memcpy.
- bool IsSplittable;
+ /// This provides an ordering over ranges such that start offsets are
+ /// always increasing, and within equal start offsets, the end offsets are
+ /// decreasing. Thus the spanning range comes first in a cluster with the
+ /// same start position.
+ bool operator<(const ByteRange &RHS) const {
+ if (BeginOffset < RHS.BeginOffset) return true;
+ if (BeginOffset > RHS.BeginOffset) return false;
+ if (EndOffset > RHS.EndOffset) return true;
+ return false;
+ }
- /// \brief Test whether a partition has been marked as dead.
- bool isDead() const {
- if (BeginOffset == UINT64_MAX) {
- assert(EndOffset == UINT64_MAX);
- return true;
- }
- return false;
- }
+ /// \brief Support comparison with a single offset to allow binary searches.
+ friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) {
+ return LHS.BeginOffset < RHSOffset;
+ }
+
+ friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
+ const ByteRange &RHS) {
+ return LHSOffset < RHS.BeginOffset;
+ }
+
+ bool operator==(const ByteRange &RHS) const {
+ return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset;
+ }
+ bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); }
+};
- /// \brief Kill a partition.
- /// This is accomplished by setting both its beginning and end offset to
- /// the maximum possible value.
- void kill() {
- assert(!isDead() && "He's Dead, Jim!");
- BeginOffset = EndOffset = UINT64_MAX;
+/// \brief A partition of an alloca.
+///
+/// This structure represents a contiguous partition of the alloca. These are
+/// formed by examining the uses of the alloca. During formation, they may
+/// overlap but once an AllocaPartitioning is built, the Partitions within it
+/// are all disjoint.
+struct Partition : public ByteRange {
+ /// \brief Whether this partition is splittable into smaller partitions.
+ ///
+ /// We flag partitions as splittable when they are formed entirely due to
+ /// accesses by trivially splittable operations such as memset and memcpy.
+ bool IsSplittable;
+
+ /// \brief Test whether a partition has been marked as dead.
+ bool isDead() const {
+ if (BeginOffset == UINT64_MAX) {
+ assert(EndOffset == UINT64_MAX);
+ return true;
}
+ return false;
+ }
- Partition() : ByteRange(), IsSplittable() {}
- Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
- : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
- };
+ /// \brief Kill a partition.
+ /// This is accomplished by setting both its beginning and end offset to
+ /// the maximum possible value.
+ void kill() {
+ assert(!isDead() && "He's Dead, Jim!");
+ BeginOffset = EndOffset = UINT64_MAX;
+ }
+
+ Partition() : ByteRange(), IsSplittable() {}
+ Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable)
+ : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {}
+};
+
+/// \brief A particular use of a partition of the alloca.
+///
+/// This structure is used to associate uses of a partition with it. They
+/// mark the range of bytes which are referenced by a particular instruction,
+/// and includes a handle to the user itself and the pointer value in use.
+/// The bounds of these uses are determined by intersecting the bounds of the
+/// memory use itself with a particular partition. As a consequence there is
+/// intentionally overlap between various uses of the same partition.
+class PartitionUse : public ByteRange {
+ /// \brief Combined storage for both the Use* and split state.
+ PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+public:
+ PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+ PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+ bool IsSplit)
+ : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
- /// \brief A particular use of a partition of the alloca.
+ /// \brief The use in question. Provides access to both user and used value.
///
- /// This structure is used to associate uses of a partition with it. They
- /// mark the range of bytes which are referenced by a particular instruction,
- /// and includes a handle to the user itself and the pointer value in use.
- /// The bounds of these uses are determined by intersecting the bounds of the
- /// memory use itself with a particular partition. As a consequence there is
- /// intentionally overlap between various uses of the same partition.
- struct PartitionUse : public ByteRange {
- /// \brief The use in question. Provides access to both user and used value.
- ///
- /// Note that this may be null if the partition use is *dead*, that is, it
- /// should be ignored.
- Use *U;
+ /// Note that this may be null if the partition use is *dead*, that is, it
+ /// should be ignored.
+ Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
- PartitionUse() : ByteRange(), U() {}
- PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
- : ByteRange(BeginOffset, EndOffset), U(U) {}
- };
+ /// \brief Set the use for this partition use range.
+ void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
+
+ /// \brief Whether this use is split across multiple partitions.
+ bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
+};
+}
+namespace llvm {
+template <> struct isPodLike<Partition> : llvm::true_type {};
+template <> struct isPodLike<PartitionUse> : llvm::true_type {};
+}
+
+namespace {
+/// \brief Alloca partitioning representation.
+///
+/// This class represents a partitioning of an alloca into slices, and
+/// information about the nature of uses of each slice of the alloca. The goal
+/// is that this information is sufficient to decide if and how to split the
+/// alloca apart and replace slices with scalars. It is also intended that this
+/// structure can capture the relevant information needed both to decide about
+/// and to enact these transformations.
+class AllocaPartitioning {
+public:
/// \brief Construct a partitioning of a particular alloca.
///
/// Construction does most of the work for partitioning the alloca. This
@@ -334,7 +392,7 @@ private:
class UseBuilder;
friend class AllocaPartitioning::UseBuilder;
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// \brief Handle to alloca instruction to simplify method interfaces.
AllocaInst &AI;
#endif
@@ -404,106 +462,17 @@ private:
};
}
-template <typename DerivedT, typename RetT>
-class AllocaPartitioning::BuilderBase
- : public InstVisitor<DerivedT, RetT> {
-public:
- BuilderBase(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
- : TD(TD),
- AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
- P(P) {
- enqueueUsers(AI, 0);
- }
-
-protected:
- const DataLayout &TD;
- const uint64_t AllocSize;
- AllocaPartitioning &P;
-
- SmallPtrSet<Use *, 8> VisitedUses;
+static Value *foldSelectInst(SelectInst &SI) {
+ // If the condition being selected on is a constant or the same value is
+ // being selected between, fold the select. Yes this does (rarely) happen
+ // early on.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
+ return SI.getOperand(1+CI->isZero());
+ if (SI.getOperand(1) == SI.getOperand(2))
+ return SI.getOperand(1);
- struct OffsetUse {
- Use *U;
- int64_t Offset;
- };
- SmallVector<OffsetUse, 8> Queue;
-
- // The active offset and use while visiting.
- Use *U;
- int64_t Offset;
-
- void enqueueUsers(Instruction &I, int64_t UserOffset) {
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
- UI != UE; ++UI) {
- if (VisitedUses.insert(&UI.getUse())) {
- OffsetUse OU = { &UI.getUse(), UserOffset };
- Queue.push_back(OU);
- }
- }
- }
-
- bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) {
- GEPOffset = Offset;
- for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
- GTI != GTE; ++GTI) {
- ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
- if (!OpC)
- return false;
- if (OpC->isZero())
- continue;
-
- // Handle a struct index, which adds its field offset to the pointer.
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- unsigned ElementIdx = OpC->getZExtValue();
- const StructLayout *SL = TD.getStructLayout(STy);
- uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
- // Check that we can continue to model this GEP in a signed 64-bit offset.
- if (ElementOffset > INT64_MAX ||
- (GEPOffset >= 0 &&
- ((uint64_t)GEPOffset + ElementOffset) > INT64_MAX)) {
- DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding "
- << "what can be represented in an int64_t!\n"
- << " alloca: " << P.AI << "\n");
- return false;
- }
- if (GEPOffset < 0)
- GEPOffset = ElementOffset + (uint64_t)-GEPOffset;
- else
- GEPOffset += ElementOffset;
- continue;
- }
-
- APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits());
- Index *= APInt(Index.getBitWidth(),
- TD.getTypeAllocSize(GTI.getIndexedType()));
- Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset,
- /*isSigned*/true);
- // Check if the result can be stored in our int64_t offset.
- if (!Index.isSignedIntN(sizeof(GEPOffset) * 8)) {
- DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding "
- << "what can be represented in an int64_t!\n"
- << " alloca: " << P.AI << "\n");
- return false;
- }
-
- GEPOffset = Index.getSExtValue();
- }
- return true;
- }
-
- Value *foldSelectInst(SelectInst &SI) {
- // If the condition being selected on is a constant or the same value is
- // being selected between, fold the select. Yes this does (rarely) happen
- // early on.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
- return SI.getOperand(1+CI->isZero());
- if (SI.getOperand(1) == SI.getOperand(2)) {
- assert(*U == SI.getOperand(1));
- return SI.getOperand(1);
- }
- return 0;
- }
-};
+ return 0;
+}
/// \brief Builder for the alloca partitioning.
///
@@ -511,67 +480,45 @@ protected:
/// of an alloca and splitting the partitions for each load and store at each
/// offset.
class AllocaPartitioning::PartitionBuilder
- : public BuilderBase<PartitionBuilder, bool> {
- friend class InstVisitor<PartitionBuilder, bool>;
+ : public PtrUseVisitor<PartitionBuilder> {
+ friend class PtrUseVisitor<PartitionBuilder>;
+ friend class InstVisitor<PartitionBuilder>;
+ typedef PtrUseVisitor<PartitionBuilder> Base;
+
+ const uint64_t AllocSize;
+ AllocaPartitioning &P;
SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap;
public:
- PartitionBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
- : BuilderBase<PartitionBuilder, bool>(TD, AI, P) {}
-
- /// \brief Run the builder over the allocation.
- bool operator()() {
- // Note that we have to re-evaluate size on each trip through the loop as
- // the queue grows at the tail.
- for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
- U = Queue[Idx].U;
- Offset = Queue[Idx].Offset;
- if (!visit(cast<Instruction>(U->getUser())))
- return false;
- }
- return true;
- }
+ PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P)
+ : PtrUseVisitor<PartitionBuilder>(DL),
+ AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())),
+ P(P) {}
private:
- bool markAsEscaping(Instruction &I) {
- P.PointerEscapingInstr = &I;
- return false;
- }
-
- void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
+ void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
bool IsSplittable = false) {
- // Completely skip uses which have a zero size or don't overlap the
- // allocation.
- if (Size == 0 ||
- (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
- (Offset < 0 && (uint64_t)-Offset >= Size)) {
+ // Completely skip uses which have a zero size or start either before or
+ // past the end of the allocation.
+ if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
- << " which starts past the end of the " << AllocSize
- << " byte alloca:\n"
+ << " which has zero size or starts outside of the "
+ << AllocSize << " byte alloca:\n"
<< " alloca: " << P.AI << "\n"
<< " use: " << I << "\n");
return;
}
- // Clamp the start to the beginning of the allocation.
- if (Offset < 0) {
- DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
- << " to start at the beginning of the alloca:\n"
- << " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- Size -= (uint64_t)-Offset;
- Offset = 0;
- }
-
- uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
+ uint64_t BeginOffset = Offset.getZExtValue();
+ uint64_t EndOffset = BeginOffset + Size;
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
- // NOTE! This may appear superficially to be something we could ignore
- // entirely, but that is not so! There may be PHI-node uses where some
- // instructions are dead but not others. We can't completely ignore the
- // PHI node, and so have to record at least the information here.
+ // This may appear superficially to be something we could ignore entirely,
+ // but that is not so! There may be widened loads or PHI-node uses where
+ // some instructions are dead but not others. We can't completely ignore
+ // them, and so have to record at least the information here.
assert(AllocSize >= BeginOffset); // Established above.
if (Size > AllocSize - BeginOffset) {
DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -585,9 +532,41 @@ private:
P.Partitions.push_back(New);
}
- bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset,
- bool IsVolatile) {
- uint64_t Size = TD.getTypeStoreSize(Ty);
+ void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
+ uint64_t Size, bool IsVolatile) {
+ // We allow splitting of loads and stores where the type is an integer type
+ // and cover the entire alloca. This prevents us from splitting over
+ // eagerly.
+ // FIXME: In the great blue eventually, we should eagerly split all integer
+ // loads and stores, and then have a separate step that merges adjacent
+ // alloca partitions into a single partition suitable for integer widening.
+ // Or we should skip the merge step and rely on GVN and other passes to
+ // merge adjacent loads and stores that survive mem2reg.
+ bool IsSplittable =
+ Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
+
+ insertUse(I, Offset, Size, IsSplittable);
+ }
+
+ void visitLoadInst(LoadInst &LI) {
+ assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
+ "All simple FCA loads should have been pre-split");
+
+ if (!IsOffsetKnown)
+ return PI.setAborted(&LI);
+
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
+ }
+
+ void visitStoreInst(StoreInst &SI) {
+ Value *ValOp = SI.getValueOperand();
+ if (ValOp == *U)
+ return PI.setEscapedAndAborted(&SI);
+ if (!IsOffsetKnown)
+ return PI.setAborted(&SI);
+
+ uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
// If this memory access can be shown to *statically* extend outside the
// bounds of of the allocation, it's behavior is undefined, so simply
@@ -596,73 +575,52 @@ private:
// risk of overflow.
// FIXME: We should instead consider the pointer to have escaped if this
// function is being instrumented for addressing bugs or race conditions.
- if (Offset < 0 || (uint64_t)Offset >= AllocSize ||
- Size > (AllocSize - (uint64_t)Offset)) {
- DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
- << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size)) {
+ DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
<< " which extends past the end of the " << AllocSize
<< " byte alloca:\n"
<< " alloca: " << P.AI << "\n"
- << " use: " << I << "\n");
- return true;
+ << " use: " << SI << "\n");
+ return;
}
- // We allow splitting of loads and stores where the type is an integer type
- // and which cover the entire alloca. Such integer loads and stores
- // often require decomposition into fine grained loads and stores.
- bool IsSplittable = false;
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
-
- insertUse(I, Offset, Size, IsSplittable);
- return true;
- }
-
- bool visitBitCastInst(BitCastInst &BC) {
- enqueueUsers(BC, Offset);
- return true;
- }
-
- bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
- int64_t GEPOffset;
- if (!computeConstantGEPOffset(GEPI, GEPOffset))
- return markAsEscaping(GEPI);
-
- enqueueUsers(GEPI, GEPOffset);
- return true;
- }
-
- bool visitLoadInst(LoadInst &LI) {
- assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
- "All simple FCA loads should have been pre-split");
- return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
- }
-
- bool visitStoreInst(StoreInst &SI) {
- Value *ValOp = SI.getValueOperand();
- if (ValOp == *U)
- return markAsEscaping(SI);
-
assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
"All simple FCA stores should have been pre-split");
- return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+ handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
}
- bool visitMemSetInst(MemSetInst &II) {
+ void visitMemSetInst(MemSetInst &II) {
assert(II.getRawDest() == *U && "Pointer use is not the destination?");
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
- uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
- insertUse(II, Offset, Size, Length);
- return true;
+ if ((Length && Length->getValue() == 0) ||
+ (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ // Zero-length mem transfer intrinsics can be ignored entirely.
+ return;
+
+ if (!IsOffsetKnown)
+ return PI.setAborted(&II);
+
+ insertUse(II, Offset,
+ Length ? Length->getLimitedValue()
+ : AllocSize - Offset.getLimitedValue(),
+ (bool)Length);
}
- bool visitMemTransferInst(MemTransferInst &II) {
+ void visitMemTransferInst(MemTransferInst &II) {
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
- uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
- if (!Size)
+ if ((Length && Length->getValue() == 0) ||
+ (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
// Zero-length mem transfer intrinsics can be ignored entirely.
- return true;
+ return;
+
+ if (!IsOffsetKnown)
+ return PI.setAborted(&II);
+
+ uint64_t RawOffset = Offset.getLimitedValue();
+ uint64_t Size = Length ? Length->getLimitedValue()
+ : AllocSize - RawOffset;
MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
@@ -670,12 +628,12 @@ private:
Offsets.IsSplittable = Length;
if (*U == II.getRawDest()) {
- Offsets.DestBegin = Offset;
- Offsets.DestEnd = Offset + Size;
+ Offsets.DestBegin = RawOffset;
+ Offsets.DestEnd = RawOffset + Size;
}
if (*U == II.getRawSource()) {
- Offsets.SourceBegin = Offset;
- Offsets.SourceEnd = Offset + Size;
+ Offsets.SourceBegin = RawOffset;
+ Offsets.SourceEnd = RawOffset + Size;
}
// If we have set up end offsets for both the source and the destination,
@@ -688,7 +646,7 @@ private:
// In that case, we can completely elide the transfer.
if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) {
P.Partitions[PrevIdx].kill();
- return true;
+ return;
}
// Otherwise we have an offset transfer within the same alloca. We can't
@@ -701,7 +659,7 @@ private:
// For non-volatile transfers this is a no-op.
if (!II.isVolatile())
- return true;
+ return;
// Otherwise just suppress splitting.
Offsets.IsSplittable = false;
@@ -721,23 +679,25 @@ private:
"Already have intrinsic in map but haven't seen both ends");
(void)Inserted;
}
-
- return true;
}
// Disable SRoA for any intrinsics except for lifetime invariants.
- // FIXME: What about debug instrinsics? This matches old behavior, but
+ // FIXME: What about debug intrinsics? This matches old behavior, but
// doesn't make sense.
- bool visitIntrinsicInst(IntrinsicInst &II) {
+ void visitIntrinsicInst(IntrinsicInst &II) {
+ if (!IsOffsetKnown)
+ return PI.setAborted(&II);
+
if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end) {
ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
- uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue());
+ uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
+ Length->getLimitedValue());
insertUse(II, Offset, Size, true);
- return true;
+ return;
}
- return markAsEscaping(II);
+ Base::visitIntrinsicInst(II);
}
Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
@@ -757,14 +717,14 @@ private:
llvm::tie(UsedI, I) = Uses.pop_back_val();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- Size = std::max(Size, TD.getTypeStoreSize(LI->getType()));
+ Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Value *Op = SI->getOperand(0);
if (Op == UsedI)
return SI;
- Size = std::max(Size, TD.getTypeStoreSize(Op->getType()));
+ Size = std::max(Size, DL.getTypeStoreSize(Op->getType()));
continue;
}
@@ -785,54 +745,62 @@ private:
return 0;
}
- bool visitPHINode(PHINode &PN) {
+ void visitPHINode(PHINode &PN) {
+ if (PN.use_empty())
+ return;
+ if (!IsOffsetKnown)
+ return PI.setAborted(&PN);
+
// See if we already have computed info on this node.
std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN];
if (PHIInfo.first) {
PHIInfo.second = true;
insertUse(PN, Offset, PHIInfo.first);
- return true;
+ return;
}
// Check for an unsafe use of the PHI node.
- if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
- return markAsEscaping(*EscapingI);
+ if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
+ return PI.setAborted(UnsafeI);
insertUse(PN, Offset, PHIInfo.first);
- return true;
}
- bool visitSelectInst(SelectInst &SI) {
+ void visitSelectInst(SelectInst &SI) {
+ if (SI.use_empty())
+ return;
if (Value *Result = foldSelectInst(SI)) {
if (Result == *U)
// If the result of the constant fold will be the pointer, recurse
// through the select as if we had RAUW'ed it.
- enqueueUsers(SI, Offset);
+ enqueueUsers(SI);
- return true;
+ return;
}
+ if (!IsOffsetKnown)
+ return PI.setAborted(&SI);
// See if we already have computed info on this node.
std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI];
if (SelectInfo.first) {
SelectInfo.second = true;
insertUse(SI, Offset, SelectInfo.first);
- return true;
+ return;
}
// Check for an unsafe use of the PHI node.
- if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
- return markAsEscaping(*EscapingI);
+ if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
+ return PI.setAborted(UnsafeI);
insertUse(SI, Offset, SelectInfo.first);
- return true;
}
/// \brief Disable SROA entirely if there are unhandled users of the alloca.
- bool visitInstruction(Instruction &I) { return markAsEscaping(I); }
+ void visitInstruction(Instruction &I) {
+ PI.setAborted(&I);
+ }
};
-
/// \brief Use adder for the alloca partitioning.
///
/// This class adds the uses of an alloca to all of the partitions which they
@@ -851,26 +819,22 @@ private:
/// partition space is pre-sorted, and do a logarithmic search for the
/// partition needed, making the total visit a classical ((N + M) * log(N))
/// complexity operation.
-class AllocaPartitioning::UseBuilder : public BuilderBase<UseBuilder> {
+class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> {
+ friend class PtrUseVisitor<UseBuilder>;
friend class InstVisitor<UseBuilder>;
+ typedef PtrUseVisitor<UseBuilder> Base;
+
+ const uint64_t AllocSize;
+ AllocaPartitioning &P;
/// \brief Set to de-duplicate dead instructions found in the use walk.
SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
public:
UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
- : BuilderBase<UseBuilder>(TD, AI, P) {}
-
- /// \brief Run the builder over the allocation.
- void operator()() {
- // Note that we have to re-evaluate size on each trip through the loop as
- // the queue grows at the tail.
- for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
- U = Queue[Idx].U;
- Offset = Queue[Idx].Offset;
- this->visit(cast<Instruction>(U->getUser()));
- }
- }
+ : PtrUseVisitor<UseBuilder>(TD),
+ AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
+ P(P) {}
private:
void markAsDead(Instruction &I) {
@@ -878,20 +842,14 @@ private:
P.DeadUsers.push_back(&I);
}
- void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
+ void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) {
// If the use has a zero size or extends outside of the allocation, record
// it as a dead use for elimination later.
- if (Size == 0 || (uint64_t)Offset >= AllocSize ||
- (Offset < 0 && (uint64_t)-Offset >= Size))
+ if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize))
return markAsDead(User);
- // Clamp the start to the beginning of the allocation.
- if (Offset < 0) {
- Size -= (uint64_t)-Offset;
- Offset = 0;
- }
-
- uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
+ uint64_t BeginOffset = Offset.getZExtValue();
+ uint64_t EndOffset = BeginOffset + Size;
// Clamp the end offset to the end of the allocation. Note that this is
// formulated to handle even the case where "BeginOffset + Size" overflows.
@@ -900,13 +858,14 @@ private:
EndOffset = AllocSize;
// NB: This only works if we have zero overlapping partitions.
- iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
- if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
- B = llvm::prior(B);
- for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
- ++I) {
+ iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+ if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+ I = llvm::prior(I);
+ iterator E = P.end();
+ bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+ for (; I != E && I->BeginOffset < EndOffset; ++I) {
PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
- std::min(I->EndOffset, EndOffset), U);
+ std::min(I->EndOffset, EndOffset), U, IsSplit);
P.use_push_back(I, NewPU);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[U]
@@ -914,59 +873,63 @@ private:
}
}
- void handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) {
- uint64_t Size = TD.getTypeStoreSize(Ty);
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of of the allocation, it's behavior is undefined, so simply
- // ignore it. Note that this is more strict than the generic clamping
- // behavior of insertUse.
- if (Offset < 0 || (uint64_t)Offset >= AllocSize ||
- Size > (AllocSize - (uint64_t)Offset))
- return markAsDead(I);
-
- insertUse(I, Offset, Size);
- }
-
void visitBitCastInst(BitCastInst &BC) {
if (BC.use_empty())
return markAsDead(BC);
- enqueueUsers(BC, Offset);
+ return Base::visitBitCastInst(BC);
}
void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
if (GEPI.use_empty())
return markAsDead(GEPI);
- int64_t GEPOffset;
- if (!computeConstantGEPOffset(GEPI, GEPOffset))
- llvm_unreachable("Unable to compute constant offset for use");
-
- enqueueUsers(GEPI, GEPOffset);
+ return Base::visitGetElementPtrInst(GEPI);
}
void visitLoadInst(LoadInst &LI) {
- handleLoadOrStore(LI.getType(), LI, Offset);
+ assert(IsOffsetKnown);
+ uint64_t Size = DL.getTypeStoreSize(LI.getType());
+ insertUse(LI, Offset, Size);
}
void visitStoreInst(StoreInst &SI) {
- handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+ assert(IsOffsetKnown);
+ uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+ // If this memory access can be shown to *statically* extend outside the
+ // bounds of of the allocation, it's behavior is undefined, so simply
+ // ignore it. Note that this is more strict than the generic clamping
+ // behavior of insertUse.
+ if (Offset.isNegative() || Size > AllocSize ||
+ Offset.ugt(AllocSize - Size))
+ return markAsDead(SI);
+
+ insertUse(SI, Offset, Size);
}
void visitMemSetInst(MemSetInst &II) {
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
- uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
- insertUse(II, Offset, Size);
+ if ((Length && Length->getValue() == 0) ||
+ (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+ return markAsDead(II);
+
+ assert(IsOffsetKnown);
+ insertUse(II, Offset, Length ? Length->getLimitedValue()
+ : AllocSize - Offset.getLimitedValue());
}
void visitMemTransferInst(MemTransferInst &II) {
ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
- uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
- if (!Size)
+ if ((Length && Length->getValue() == 0) ||
+ (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
return markAsDead(II);
- MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
+ assert(IsOffsetKnown);
+ uint64_t Size = Length ? Length->getLimitedValue()
+ : AllocSize - Offset.getLimitedValue();
+
+ const MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
Offsets.DestBegin == Offsets.SourceBegin)
return markAsDead(II); // Skip identity transfers without side-effects.
@@ -975,34 +938,39 @@ private:
}
void visitIntrinsicInst(IntrinsicInst &II) {
+ assert(IsOffsetKnown);
assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end);
ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
- insertUse(II, Offset,
- std::min(AllocSize - Offset, Length->getLimitedValue()));
+ insertUse(II, Offset, std::min(Length->getLimitedValue(),
+ AllocSize - Offset.getLimitedValue()));
}
- void insertPHIOrSelect(Instruction &User, uint64_t Offset) {
+ void insertPHIOrSelect(Instruction &User, const APInt &Offset) {
uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first;
// For PHI and select operands outside the alloca, we can't nuke the entire
// phi or select -- the other side might still be relevant, so we special
// case them here and use a separate structure to track the operands
// themselves which should be replaced with undef.
- if (Offset >= AllocSize) {
+ if ((Offset.isNegative() && Offset.uge(Size)) ||
+ (!Offset.isNegative() && Offset.uge(AllocSize))) {
P.DeadOperands.push_back(U);
return;
}
insertUse(User, Offset, Size);
}
+
void visitPHINode(PHINode &PN) {
if (PN.use_empty())
return markAsDead(PN);
+ assert(IsOffsetKnown);
insertPHIOrSelect(PN, Offset);
}
+
void visitSelectInst(SelectInst &SI) {
if (SI.use_empty())
return markAsDead(SI);
@@ -1011,7 +979,7 @@ private:
if (Result == *U)
// If the result of the constant fold will be the pointer, recurse
// through the select as if we had RAUW'ed it.
- enqueueUsers(SI, Offset);
+ enqueueUsers(SI);
else
// Otherwise the operand to the select is dead, and we can replace it
// with undef.
@@ -1020,6 +988,7 @@ private:
return;
}
+ assert(IsOffsetKnown);
insertPHIOrSelect(SI, Offset);
}
@@ -1126,13 +1095,20 @@ void AllocaPartitioning::splitAndMergePartitions() {
AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
:
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
PointerEscapingInstr(0) {
PartitionBuilder PB(TD, AI, *this);
- if (!PB())
+ PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI);
+ if (PtrI.isEscaped() || PtrI.isAborted()) {
+ // FIXME: We should sink the escape vs. abort info into the caller nicely,
+ // possibly by just storing the PtrInfo in the AllocaPartitioning.
+ PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
+ : PtrI.getAbortingInst();
+ assert(PointerEscapingInstr && "Did not track a bad instruction");
return;
+ }
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
@@ -1162,31 +1138,45 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
splitAndMergePartitions();
}
+ // Record how many partitions we end up with.
+ NumAllocaPartitions += Partitions.size();
+ MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca);
+
// Now build up the user lists for each of these disjoint partitions by
// re-walking the recursive users of the alloca.
Uses.resize(Partitions.size());
UseBuilder UB(TD, AI, *this);
- UB();
+ PtrI = UB.visitPtr(AI);
+ assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
+ assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
+
+ unsigned NumUses = 0;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS)
+ for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx)
+ NumUses += Uses[Idx].size();
+#endif
+ NumAllocaPartitionUses += NumUses;
+ MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca);
}
Type *AllocaPartitioning::getCommonType(iterator I) const {
Type *Ty = 0;
for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
- if (!UI->U)
+ Use *U = UI->getUse();
+ if (!U)
continue; // Skip dead uses.
- if (isa<IntrinsicInst>(*UI->U->getUser()))
+ if (isa<IntrinsicInst>(*U->getUser()))
continue;
if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
continue;
Type *UserTy = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
UserTy = LI->getType();
- } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+ else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
UserTy = SI->getValueOperand()->getType();
- } else {
+ else
return 0; // Bail if we have weird uses.
- }
if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
@@ -1222,13 +1212,13 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
StringRef Indent) const {
- for (const_use_iterator UI = use_begin(I), UE = use_end(I);
- UI != UE; ++UI) {
- if (!UI->U)
+ for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
+ if (!UI->getUse())
continue; // Skip dead uses.
OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") "
- << "used by: " << *UI->U->getUser() << "\n";
- if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+ << "used by: " << *UI->getUse()->getUser() << "\n";
+ if (MemTransferInst *II =
+ dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
bool IsDest;
if (!MTO.IsSplittable)
@@ -1251,8 +1241,7 @@ void AllocaPartitioning::print(raw_ostream &OS) const {
}
OS << "Partitioning of alloca: " << AI << "\n";
- unsigned Num = 0;
- for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) {
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
print(OS, I);
printUsers(OS, I);
}
@@ -1323,18 +1312,18 @@ public:
for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = NULL;
+ Value *Arg = 0;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
Arg = dyn_cast<Argument>(ZExt->getOperand(0));
- if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+ else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
Arg = dyn_cast<Argument>(SExt->getOperand(0));
if (!Arg)
- Arg = SI->getOperand(0);
+ Arg = SI->getValueOperand();
} else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- Arg = LI->getOperand(0);
+ Arg = LI->getPointerOperand();
} else {
continue;
}
@@ -1358,7 +1347,7 @@ namespace {
/// 1) It takes allocations of aggregates and analyzes the ways in which they
/// are used to try to split them into smaller allocations, ideally of
/// a single scalar data type. It will split up memcpy and memset accesses
-/// as necessary and try to isolate invidual scalar accesses.
+/// as necessary and try to isolate individual scalar accesses.
/// 2) It will transform accesses into forms which are suitable for SSA value
/// promotion. This can be replacing a memset with a scalar store of an
/// integer value, or it can involve speculating operations on a PHI or
@@ -1460,11 +1449,11 @@ public:
// may be grown during speculation. However, we never need to re-visit the
// new uses, and so we can use the initial size bound.
for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
- const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
- if (!PU.U)
+ const PartitionUse &PU = P.getUse(PI, Idx);
+ if (!PU.getUse())
continue; // Skip dead use.
- visit(cast<Instruction>(PU.U->getUser()));
+ visit(cast<Instruction>(PU.getUse()->getUser()));
}
}
@@ -1520,8 +1509,7 @@ private:
// We can only transform this if it is safe to push the loads into the
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
- for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
- ++Idx) {
+ for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
Value *InVal = PN.getIncomingValue(Idx);
@@ -1559,12 +1547,12 @@ private:
assert(!Loads.empty());
Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
- IRBuilder<> PHIBuilder(&PN);
+ IRBuilderTy PHIBuilder(&PN);
PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
PN.getName() + ".sroa.speculated");
// Get the TBAA tag and alignment to use from one of the loads. It doesn't
- // matter which one we get and if any differ, it doesn't matter.
+ // matter which one we get and if any differ.
LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
unsigned Align = SomeLoad->getAlignment();
@@ -1582,7 +1570,7 @@ private:
TerminatorInst *TI = Pred->getTerminator();
Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx));
Value *InVal = PN.getIncomingValue(Idx);
- IRBuilder<> PredBuilder(TI);
+ IRBuilderTy PredBuilder(TI);
LoadInst *Load
= PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." +
@@ -1609,8 +1597,8 @@ private:
// inside the load.
AllocaPartitioning::use_iterator UI
= P.findPartitionUseForPHIOrSelectOperand(InUse);
- assert(isa<PHINode>(*UI->U->getUser()));
- UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+ assert(isa<PHINode>(*UI->getUse()->getUser()));
+ UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
}
DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
}
@@ -1657,16 +1645,16 @@ private:
void visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
// If the select isn't safe to speculate, just use simple logic to emit it.
SmallVector<LoadInst *, 4> Loads;
if (!isSafeSelectToSpeculate(SI, Loads))
return;
+ IRBuilderTy IRB(&SI);
Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
AllocaPartitioning::iterator PIs[2];
- AllocaPartitioning::PartitionUse PUs[2];
+ PartitionUse PUs[2];
for (unsigned i = 0, e = 2; i != e; ++i) {
PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]);
if (PIs[i] != P.end()) {
@@ -1677,7 +1665,7 @@ private:
PUs[i] = *UI;
// Clear out the use here so that the offsets into the use list remain
// stable but this use is ignored when rewriting.
- UI->U = 0;
+ UI->setUse(0);
}
}
@@ -1709,8 +1697,8 @@ private:
for (unsigned i = 0, e = 2; i != e; ++i) {
if (PIs[i] != P.end()) {
Use *LoadUse = &Loads[i]->getOperandUse(0);
- assert(PUs[i].U->get() == LoadUse->get());
- PUs[i].U = LoadUse;
+ assert(PUs[i].getUse()->get() == LoadUse->get());
+ PUs[i].setUse(LoadUse);
P.use_push_back(PIs[i], PUs[i]);
}
}
@@ -1723,51 +1711,12 @@ private:
};
}
-/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.
-///
-/// If the provided GEP is all-constant, the total byte offset formed by the
-/// GEP is computed and Offset is set to it. If the GEP has any non-constant
-/// operands, the function returns false and the value of Offset is unmodified.
-static bool accumulateGEPOffsets(const DataLayout &TD, GEPOperator &GEP,
- APInt &Offset) {
- APInt GEPOffset(Offset.getBitWidth(), 0);
- for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
- GTI != GTE; ++GTI) {
- ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
- if (!OpC)
- return false;
- if (OpC->isZero()) continue;
-
- // Handle a struct index, which adds its field offset to the pointer.
- if (StructType *STy = dyn_cast<StructType>(*GTI)) {
- unsigned ElementIdx = OpC->getZExtValue();
- const StructLayout *SL = TD.getStructLayout(STy);
- GEPOffset += APInt(Offset.getBitWidth(),
- SL->getElementOffset(ElementIdx));
- continue;
- }
-
- APInt TypeSize(Offset.getBitWidth(),
- TD.getTypeAllocSize(GTI.getIndexedType()));
- if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
- assert((VTy->getScalarSizeInBits() % 8) == 0 &&
- "vector element size is not a multiple of 8, cannot GEP over it");
- TypeSize = VTy->getScalarSizeInBits() / 8;
- }
-
- GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;
- }
- Offset = GEPOffset;
- return true;
-}
-
/// \brief Build a GEP out of a base pointer and indices.
///
/// This will return the BasePtr if that is valid, or build a new GEP
/// instruction using the IRBuilder if GEP-ing is needed.
-static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
+ SmallVectorImpl<Value *> &Indices) {
if (Indices.empty())
return BasePtr;
@@ -1776,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
return BasePtr;
- return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx");
+ return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx");
}
/// \brief Get a natural GEP off of the BasePtr walking through Ty toward
@@ -1788,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr,
/// TargetTy. If we can't find one with the same type, we at least try to use
/// one with the same size. If none of that works, we just produce the GEP as
/// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD,
Value *BasePtr, Type *Ty, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
if (Ty == TargetTy)
- return buildGEP(IRB, BasePtr, Indices, Prefix);
+ return buildGEP(IRB, BasePtr, Indices);
// See if we can descend into a struct and locate a field with the correct
// type.
@@ -1820,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
if (ElementTy != TargetTy)
Indices.erase(Indices.end() - NumLayers, Indices.end());
- return buildGEP(IRB, BasePtr, Indices, Prefix);
+ return buildGEP(IRB, BasePtr, Indices);
}
/// \brief Recursively compute indices for a natural GEP.
///
/// This is the recursive step for getNaturalGEPWithOffset that walks down the
/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD,
Value *Ptr, Type *Ty, APInt &Offset,
Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
if (Offset == 0)
- return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix);
+ return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices);
// We can't recurse through pointer types.
if (Ty->isPointerTy())
@@ -1843,7 +1790,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
// extremely poorly defined currently. The long-term goal is to remove GEPing
// over a vector from the IR completely.
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
- unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();
+ unsigned ElementSizeInBits = TD.getTypeSizeInBits(VecTy->getScalarType());
if (ElementSizeInBits % 8)
return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
@@ -1853,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices, Prefix);
+ Offset, TargetTy, Indices);
}
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
@@ -1866,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
StructType *STy = dyn_cast<StructType>(Ty);
@@ -1885,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
/// \brief Get a natural GEP from a base pointer to a particular offset and
@@ -1898,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
/// Indices, and setting Ty to the result subtype.
///
/// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
+static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD,
Value *Ptr, APInt Offset, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &Prefix) {
+ SmallVectorImpl<Value *> &Indices) {
PointerType *Ty = cast<PointerType>(Ptr->getType());
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
@@ -1920,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy,
- Indices, Prefix);
+ Indices);
}
/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1935,12 +1881,11 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
/// The strategy for finding the more natural GEPs is to peel off layers of the
/// pointer, walking back through bit casts and GEPs, searching for a base
/// pointer from which we can compute a natural GEP with the desired
-/// properities. The algorithm tries to fold as many constant indices into
+/// properties. The algorithm tries to fold as many constant indices into
/// a single GEP as possible, thus making each GEP more independent of the
/// surrounding code.
-static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
- Value *Ptr, APInt Offset, Type *PointerTy,
- const Twine &Prefix) {
+static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD,
+ Value *Ptr, APInt Offset, Type *PointerTy) {
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1963,7 +1908,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
// First fold any existing GEPs into the offset.
while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
APInt GEPOffset(Offset.getBitWidth(), 0);
- if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
+ if (!GEP->accumulateConstantOffset(TD, GEPOffset))
break;
Offset += GEPOffset;
Ptr = GEP->getPointerOperand();
@@ -1974,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
// See if we can perform a natural GEP here.
Indices.clear();
if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy,
- Indices, Prefix)) {
+ Indices)) {
if (P->getType() == PointerTy) {
// Zap any offset pointer that we ended up computing in previous rounds.
if (OffsetPtr && OffsetPtr->use_empty())
@@ -2009,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
if (!OffsetPtr) {
if (!Int8Ptr) {
Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(),
- Prefix + ".raw_cast");
+ "raw_cast");
Int8PtrOffset = Offset;
}
OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr :
IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset),
- Prefix + ".raw_idx");
+ "raw_idx");
}
Ptr = OffsetPtr;
// On the off chance we were targeting i8*, guard the bitcast here.
if (Ptr->getType() != PointerTy)
- Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast");
+ Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast");
return Ptr;
}
@@ -2035,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+ if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+ return true;
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -2057,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
/// This will try various different casting techniques, such as bitcasts,
/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
/// two types for viability with this routine.
-static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
Type *Ty) {
assert(canConvertValue(DL, V->getType(), Ty) &&
"Value not convertable to type");
if (V->getType() == Ty)
return V;
+ if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+ if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+ if (NewITy->getBitWidth() > OldITy->getBitWidth())
+ return IRB.CreateZExt(V, NewITy);
if (V->getType()->isIntegerTy() && Ty->isPointerTy())
return IRB.CreateIntToPtr(V, Ty);
if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -2090,19 +2043,19 @@ static bool isVectorPromotionViable(const DataLayout &TD,
if (!Ty)
return false;
- uint64_t VecSize = TD.getTypeSizeInBits(Ty);
- uint64_t ElementSize = Ty->getScalarSizeInBits();
+ uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType());
// While the definition of LLVM vectors is bitpacked, we don't support sizes
// that aren't byte sized.
if (ElementSize % 8)
return false;
- assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");
- VecSize /= 8;
+ assert((TD.getTypeSizeInBits(Ty) % 8) == 0 &&
+ "vector size not a multiple of element size?");
ElementSize /= 8;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -2116,30 +2069,34 @@ static bool isVectorPromotionViable(const DataLayout &TD,
EndIndex > Ty->getNumElements())
return false;
- // FIXME: We should build shuffle vector instructions to handle
- // non-element-sized accesses.
- if ((EndOffset - BeginOffset) != ElementSize &&
- (EndOffset - BeginOffset) != VecSize)
- return false;
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ uint64_t NumElements = EndIndex - BeginIndex;
+ Type *PartitionTy
+ = (NumElements == 1) ? Ty->getElementType()
+ : VectorType::get(Ty->getElementType(), NumElements);
- if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile())
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+ } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
return false;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ if (!canConvertValue(TD, PartitionTy, LI->getType()))
+ return false;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
if (SI->isVolatile())
return false;
+ if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
+ return false;
} else {
return false;
}
@@ -2178,13 +2135,14 @@ static bool isIntegerWideningViable(const DataLayout &TD,
uint64_t Size = TD.getTypeStoreSize(AllocaTy);
- // Check the uses to ensure the uses are (likely) promoteable integer uses.
+ // Check the uses to ensure the uses are (likely) promotable integer uses.
// Also ensure that the alloca has a covering load or store. We don't want
- // to widen the integer operotains only to fail to promote due to some other
+ // to widen the integer operations only to fail to promote due to some other
// unsplittable entry (which we may make splittable later).
bool WholeAllocaOp = false;
for (; I != E; ++I) {
- if (!I->U)
+ Use *U = I->getUse();
+ if (!U)
continue; // Skip dead use.
uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2195,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelEnd > Size)
return false;
- if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
if (RelBegin == 0 && RelEnd == Size)
@@ -2210,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, AllocaTy, LI->getType()))
return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
@@ -2226,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
if (RelBegin != 0 || RelEnd != Size ||
!canConvertValue(TD, ValueTy, AllocaTy))
return false;
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
return false;
- if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
const AllocaPartitioning::MemTransferOffsets &MTO
= P.getMemTransferOffsets(*MTI);
if (!MTO.IsSplittable)
return false;
}
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
@@ -2246,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
return WholeAllocaOp;
}
-static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
IntegerType *Ty, uint64_t Offset,
const Twine &Name) {
DEBUG(dbgs() << " start: " << *V << "\n");
@@ -2269,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
return V;
}
-static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
Value *V, uint64_t Offset, const Twine &Name) {
IntegerType *IntTy = cast<IntegerType>(Old->getType());
IntegerType *Ty = cast<IntegerType>(V->getType());
@@ -2300,6 +2258,84 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
return V;
}
+static Value *extractVector(IRBuilderTy &IRB, Value *V,
+ unsigned BeginIndex, unsigned EndIndex,
+ const Twine &Name) {
+ VectorType *VecTy = cast<VectorType>(V->getType());
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+ if (NumElements == VecTy->getNumElements())
+ return V;
+
+ if (NumElements == 1) {
+ V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
+ Name + ".extract");
+ DEBUG(dbgs() << " extract: " << *V << "\n");
+ return V;
+ }
+
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(NumElements);
+ for (unsigned i = BeginIndex; i != EndIndex; ++i)
+ Mask.push_back(IRB.getInt32(i));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ Name + ".extract");
+ DEBUG(dbgs() << " shuffle: " << *V << "\n");
+ return V;
+}
+
+static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
+ unsigned BeginIndex, const Twine &Name) {
+ VectorType *VecTy = cast<VectorType>(Old->getType());
+ assert(VecTy && "Can only insert a vector into a vector");
+
+ VectorType *Ty = dyn_cast<VectorType>(V->getType());
+ if (!Ty) {
+ // Single element to insert.
+ V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
+ Name + ".insert");
+ DEBUG(dbgs() << " insert: " << *V << "\n");
+ return V;
+ }
+
+ assert(Ty->getNumElements() <= VecTy->getNumElements() &&
+ "Too many elements!");
+ if (Ty->getNumElements() == VecTy->getNumElements()) {
+ assert(V->getType() == VecTy && "Vector type mismatch");
+ return V;
+ }
+ unsigned EndIndex = BeginIndex + Ty->getNumElements();
+
+ // When inserting a smaller vector into the larger to store, we first
+ // use a shuffle vector to widen it with undef elements, and then
+ // a second shuffle vector to select between the loaded vector and the
+ // incoming vector.
+ SmallVector<Constant*, 8> Mask;
+ Mask.reserve(VecTy->getNumElements());
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i - BeginIndex));
+ else
+ Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
+ V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+ ConstantVector::get(Mask),
+ Name + ".expand");
+ DEBUG(dbgs() << " shuffle1: " << *V << "\n");
+
+ Mask.clear();
+ for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+ if (i >= BeginIndex && i < EndIndex)
+ Mask.push_back(IRB.getInt32(i));
+ else
+ Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
+ V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
+ Name + "insert");
+ DEBUG(dbgs() << " shuffle2: " << *V << "\n");
+ return V;
+}
+
namespace {
/// \brief Visitor to rewrite instructions using a partition of an alloca to
/// use a new alloca.
@@ -2321,7 +2357,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// If we are rewriting an alloca partition which can be written as pure
// vector operations, we stash extra information here. When VecTy is
- // non-null, we have some strict guarantees about the rewriten alloca:
+ // non-null, we have some strict guarantees about the rewritten alloca:
// - The new alloca is exactly the size of the vector type here.
// - The accesses all either map to the entire vector or to a single
// element.
@@ -2340,11 +2376,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
// The offset of the partition user currently being rewritten.
uint64_t BeginOffset, EndOffset;
+ bool IsSplit;
Use *OldUse;
Instruction *OldPtr;
- // The name prefix to use when rewriting instructions for this alloca.
- std::string NamePrefix;
+ // Utility IR builder, whose name prefix is setup for each visited use, and
+ // the insertion point is set to point to the user.
+ IRBuilderTy IRB;
public:
AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P,
@@ -2357,7 +2395,8 @@ public:
NewAllocaEndOffset(NewEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
VecTy(), ElementTy(), ElementSize(), IntTy(),
- BeginOffset(), EndOffset() {
+ BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(),
+ IRB(NewAI.getContext(), ConstantFolder()) {
}
/// \brief Visit the users of the alloca partition and rewrite them.
@@ -2369,9 +2408,9 @@ public:
++NumVectorized;
VecTy = cast<VectorType>(NewAI.getAllocatedType());
ElementTy = VecTy->getElementType();
- assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
+ assert((TD.getTypeSizeInBits(VecTy->getScalarType()) % 8) == 0 &&
"Only multiple-of-8 sized vector elements are viable");
- ElementSize = VecTy->getScalarSizeInBits() / 8;
+ ElementSize = TD.getTypeSizeInBits(VecTy->getScalarType()) / 8;
} else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
NewAllocaBeginOffset, P, I, E)) {
IntTy = Type::getIntNTy(NewAI.getContext(),
@@ -2379,14 +2418,21 @@ public:
}
bool CanSROA = true;
for (; I != E; ++I) {
- if (!I->U)
+ if (!I->getUse())
continue; // Skip dead uses.
BeginOffset = I->BeginOffset;
EndOffset = I->EndOffset;
- OldUse = I->U;
- OldPtr = cast<Instruction>(I->U->get());
- NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
- CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+ IsSplit = I->isSplit();
+ OldUse = I->getUse();
+ OldPtr = cast<Instruction>(OldUse->get());
+
+ Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
+ IRB.SetInsertPoint(OldUserI);
+ IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
+ IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
+
+ CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
}
if (VecTy) {
assert(CanSROA);
@@ -2408,14 +2454,10 @@ private:
llvm_unreachable("No rewrite rule for this instruction!");
}
- Twine getName(const Twine &Suffix) {
- return NamePrefix + Suffix;
- }
-
- Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
+ Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) {
assert(BeginOffset >= NewAllocaBeginOffset);
APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
- return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
+ return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy);
}
/// \brief Compute suitable alignment to access an offset into the new alloca.
@@ -2450,13 +2492,13 @@ private:
return getOffsetTypeAlign(Ty, BeginOffset - NewAllocaBeginOffset);
}
- ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) {
+ unsigned getIndex(uint64_t Offset) {
assert(VecTy && "Can only call getIndex when rewriting a vector");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
uint32_t Index = RelOffset / ElementSize;
assert(Index * ElementSize == RelOffset);
- return IRB.getInt32(Index);
+ return Index;
}
void deleteIfTriviallyDead(Value *V) {
@@ -2465,28 +2507,27 @@ private:
Pass.DeadInsts.insert(I);
}
- Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
+ Value *rewriteVectorizedLoadInst() {
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- if (LI.getType() == VecTy->getElementType() ||
- BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- V = IRB.CreateExtractElement(V, getIndex(IRB, BeginOffset),
- getName(".extract"));
- }
- return V;
+ "load");
+ return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
}
- Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ Value *rewriteIntegerLoad(LoadInst &LI) {
assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ "load");
V = convertValue(TD, IRB, V, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
if (Offset > 0 || EndOffset < NewAllocaEndOffset)
V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
- getName(".extract"));
+ "extract");
return V;
}
@@ -2494,58 +2535,39 @@ private:
DEBUG(dbgs() << " original: " << LI << "\n");
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&LI);
uint64_t Size = EndOffset - BeginOffset;
- bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
-
- // If this memory access can be shown to *statically* extend outside the
- // bounds of the original allocation it's behavior is undefined. Rather
- // than trying to transform it, just replace it with undef.
- // FIXME: We should do something more clever for functions being
- // instrumented by asan.
- // FIXME: Eventually, once ASan and friends can flush out bugs here, this
- // should be transformed to a load of null making it unreachable.
- uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
- if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
- LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
- Pass.DeadInsts.insert(&LI);
- deleteIfTriviallyDead(OldOp);
- DEBUG(dbgs() << " to: undef!!\n");
- return true;
- }
- Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
- : LI.getType();
+ Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+ : LI.getType();
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
- V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+ V = rewriteVectorizedLoadInst();
} else if (IntTy && LI.getType()->isIntegerTy()) {
- V = rewriteIntegerLoad(IRB, LI);
+ V = rewriteIntegerLoad(LI);
} else if (BeginOffset == NewAllocaBeginOffset &&
canConvertValue(TD, NewAllocaTy, LI.getType())) {
V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- LI.isVolatile(), getName(".load"));
+ LI.isVolatile(), "load");
} else {
Type *LTy = TargetTy->getPointerTo();
V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy),
getPartitionTypeAlign(TargetTy),
- LI.isVolatile(), getName(".load"));
+ LI.isVolatile(), "load");
IsPtrAdjusted = true;
}
V = convertValue(TD, IRB, V, TargetTy);
- if (IsSplitIntLoad) {
+ if (IsSplit) {
assert(!LI.isVolatile());
assert(LI.getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
+ assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+ "Split load isn't smaller than original load");
assert(LI.getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(LI.getType()) &&
"Non-byte-multiple bit width");
- assert(LI.getType()->getIntegerBitWidth() ==
- TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide loads can be split and recomposed");
// Move the insertion point just past the load so that we can refer to it.
IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
// Create a placeholder value with the same type as LI to use as the
@@ -2555,7 +2577,7 @@ private:
Value *Placeholder
= new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
V = insertInteger(TD, IRB, Placeholder, V, BeginOffset,
- getName(".insert"));
+ "insert");
LI.replaceAllUsesWith(V);
Placeholder->replaceAllUsesWith(&LI);
delete Placeholder;
@@ -2569,19 +2591,24 @@ private:
return !LI.isVolatile() && !IsPtrAdjusted;
}
- bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V,
+ bool rewriteVectorizedStoreInst(Value *V,
StoreInst &SI, Value *OldOp) {
- if (V->getType() == ElementTy ||
- BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- if (V->getType() != ElementTy)
- V = convertValue(TD, IRB, V, ElementTy);
- LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
- getName(".insert"));
- } else if (V->getType() != VecTy) {
- V = convertValue(TD, IRB, V, VecTy);
- }
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+ Type *PartitionTy
+ = (NumElements == 1) ? ElementTy
+ : VectorType::get(ElementTy, NumElements);
+ if (V->getType() != PartitionTy)
+ V = convertValue(TD, IRB, V, PartitionTy);
+
+ // Mix in the existing elements.
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "load");
+ V = insertVector(IRB, Old, V, BeginIndex, "vec");
+
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.insert(&SI);
@@ -2590,17 +2617,17 @@ private:
return true;
}
- bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) {
+ bool rewriteIntegerStore(Value *V, StoreInst &SI) {
assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
- getName(".insert"));
+ "insert");
}
V = convertValue(TD, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
@@ -2614,7 +2641,6 @@ private:
DEBUG(dbgs() << " original: " << SI << "\n");
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
- IRBuilder<> IRB(&SI);
Value *V = SI.getValueOperand();
@@ -2627,23 +2653,21 @@ private:
uint64_t Size = EndOffset - BeginOffset;
if (Size < TD.getTypeStoreSize(V->getType())) {
assert(!SI.isVolatile());
+ assert(IsSplit && "A seemingly split store isn't splittable");
assert(V->getType()->isIntegerTy() &&
"Only integer type loads and stores are split");
assert(V->getType()->getIntegerBitWidth() ==
TD.getTypeStoreSizeInBits(V->getType()) &&
"Non-byte-multiple bit width");
- assert(V->getType()->getIntegerBitWidth() ==
- TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
- "Only alloca-wide stores can be split and recomposed");
IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
- getName(".extract"));
+ "extract");
}
if (VecTy)
- return rewriteVectorizedStoreInst(IRB, V, SI, OldOp);
+ return rewriteVectorizedStoreInst(V, SI, OldOp);
if (IntTy && V->getType()->isIntegerTy())
- return rewriteIntegerStore(IRB, V, SI);
+ return rewriteIntegerStore(V, SI);
StoreInst *NewSI;
if (BeginOffset == NewAllocaBeginOffset &&
@@ -2665,9 +2689,42 @@ private:
return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
}
+ /// \brief Compute an integer value from splatting an i8 across the given
+ /// number of bytes.
+ ///
+ /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
+ /// call this routine.
+ /// FIXME: Heed the advice above.
+ ///
+ /// \param V The i8 value to splat.
+ /// \param Size The number of bytes in the output (assuming i8 is one byte)
+ Value *getIntegerSplat(Value *V, unsigned Size) {
+ assert(Size > 0 && "Expected a positive number of bytes.");
+ IntegerType *VTy = cast<IntegerType>(V->getType());
+ assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
+ if (Size == 1)
+ return V;
+
+ Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+ V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"),
+ ConstantExpr::getUDiv(
+ Constant::getAllOnesValue(SplatIntTy),
+ ConstantExpr::getZExt(
+ Constant::getAllOnesValue(V->getType()),
+ SplatIntTy)),
+ "isplat");
+ return V;
+ }
+
+ /// \brief Compute a vector splat for a given element value.
+ Value *getVectorSplat(Value *V, unsigned NumElements) {
+ V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
+ DEBUG(dbgs() << " splat: " << *V << "\n");
+ return V;
+ }
+
bool visitMemSetInst(MemSetInst &II) {
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getRawDest() == OldPtr);
// If the memset has a variable size, it cannot be split, just adjust the
@@ -2693,7 +2750,8 @@ private:
(BeginOffset != NewAllocaBeginOffset ||
EndOffset != NewAllocaEndOffset ||
!AllocaTy->isSingleValueType() ||
- !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+ !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) ||
+ TD.getTypeSizeInBits(ScalarTy)%8 != 0)) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
CallInst *New
@@ -2709,53 +2767,61 @@ private:
// If we can represent this as a simple value, we have to build the actual
// value to store, which requires expanding the byte present in memset to
// a sensible representation for the alloca type. This is essentially
- // splatting the byte to a sufficiently wide integer, bitcasting to the
- // desired scalar type, and splatting it across any desired vector type.
- uint64_t Size = EndOffset - BeginOffset;
- Value *V = II.getValue();
- IntegerType *VTy = cast<IntegerType>(V->getType());
- Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
- if (Size*8 > VTy->getBitWidth())
- V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
- ConstantExpr::getUDiv(
- Constant::getAllOnesValue(SplatIntTy),
- ConstantExpr::getZExt(
- Constant::getAllOnesValue(V->getType()),
- SplatIntTy)),
- getName(".isplat"));
-
- // If this is an element-wide memset of a vectorizable alloca, insert it.
- if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset)) {
- if (V->getType() != ScalarTy)
- V = convertValue(TD, IRB, V, ScalarTy);
- StoreInst *Store = IRB.CreateAlignedStore(
- IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
- NewAI.getAlignment(),
- getName(".load")),
- V, getIndex(IRB, BeginOffset),
- getName(".insert")),
- &NewAI, NewAI.getAlignment());
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
- return true;
- }
+ // splatting the byte to a sufficiently wide integer, splatting it across
+ // any desired vector width, and bitcasting to the final type.
+ Value *V;
+
+ if (VecTy) {
+ // If this is a memset of a vectorized alloca, insert it.
+ assert(ElementTy == ScalarTy);
+
+ unsigned BeginIndex = getIndex(BeginOffset);
+ unsigned EndIndex = getIndex(EndOffset);
+ assert(EndIndex > BeginIndex && "Empty vector!");
+ unsigned NumElements = EndIndex - BeginIndex;
+ assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+ Value *Splat =
+ getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8);
+ Splat = convertValue(TD, IRB, Splat, ElementTy);
+ if (NumElements > 1)
+ Splat = getVectorSplat(Splat, NumElements);
- // If this is a memset on an alloca where we can widen stores, insert the
- // set integer.
- if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
- EndOffset < NewAllocaEndOffset)) {
- assert(!II.isVolatile());
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- Old = convertValue(TD, IRB, Old, IntTy);
- assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
- }
+ "oldload");
+ V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
+ } else if (IntTy) {
+ // If this is a memset on an alloca where we can widen stores, insert the
+ // set integer.
+ assert(!II.isVolatile());
+
+ uint64_t Size = EndOffset - BeginOffset;
+ V = getIntegerSplat(II.getValue(), Size);
+
+ if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
+ EndOffset != NewAllocaBeginOffset)) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "oldload");
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, V, Offset, "insert");
+ } else {
+ assert(V->getType() == IntTy &&
+ "Wrong type for an alloca wide integer!");
+ }
+ V = convertValue(TD, IRB, V, AllocaTy);
+ } else {
+ // Established these invariants above.
+ assert(BeginOffset == NewAllocaBeginOffset);
+ assert(EndOffset == NewAllocaEndOffset);
+
+ V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8);
+ if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
+ V = getVectorSplat(V, AllocaVecTy->getNumElements());
- if (V->getType() != AllocaTy)
V = convertValue(TD, IRB, V, AllocaTy);
+ }
Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
II.isVolatile());
@@ -2769,7 +2835,6 @@ private:
// them into two categories: split intrinsics and unsplit intrinsics.
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr);
bool IsDest = II.getRawDest() == OldPtr;
@@ -2840,37 +2905,21 @@ private:
// Record this instruction for deletion.
Pass.DeadInsts.insert(&II);
- bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
- EndOffset == NewAllocaEndOffset;
- bool IsVectorElement = VecTy && !IsWholeAlloca;
- uint64_t Size = EndOffset - BeginOffset;
- IntegerType *SubIntTy
- = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
-
- Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
- : II.getRawDest()->getType();
- if (!EmitMemCpy) {
- if (IsVectorElement)
- OtherPtrTy = VecTy->getElementType()->getPointerTo();
- else if (IntTy && !IsWholeAlloca)
- OtherPtrTy = SubIntTy->getPointerTo();
- else
- OtherPtrTy = NewAI.getType();
- }
-
- // Compute the other pointer, folding as much as possible to produce
- // a single, simple GEP in most cases.
- Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
- OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
- getName("." + OtherPtr->getName()));
-
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after rewriting this instruction.
+ Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
if (AllocaInst *AI
= dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
Pass.Worklist.insert(AI);
if (EmitMemCpy) {
+ Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
+ : II.getRawDest()->getType();
+
+ // Compute the other pointer, folding as much as possible to produce
+ // a single, simple GEP in most cases.
+ OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
+
Value *OurPtr
= getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
: II.getRawSource()->getType());
@@ -2891,48 +2940,63 @@ private:
if (!Align)
Align = 1;
- Value *SrcPtr = OtherPtr;
+ bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+ EndOffset == NewAllocaEndOffset;
+ uint64_t Size = EndOffset - BeginOffset;
+ unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0;
+ unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0;
+ unsigned NumElements = EndIndex - BeginIndex;
+ IntegerType *SubIntTy
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+
+ Type *OtherPtrTy = NewAI.getType();
+ if (VecTy && !IsWholeAlloca) {
+ if (NumElements == 1)
+ OtherPtrTy = VecTy->getElementType();
+ else
+ OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
+
+ OtherPtrTy = OtherPtrTy->getPointerTo();
+ } else if (IntTy && !IsWholeAlloca) {
+ OtherPtrTy = SubIntTy->getPointerTo();
+ }
+
+ Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy);
Value *DstPtr = &NewAI;
if (!IsDest)
std::swap(SrcPtr, DstPtr);
Value *Src;
- if (IsVectorElement && !IsDest) {
- // We have to extract rather than load.
- Src = IRB.CreateExtractElement(
- IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
- getIndex(IRB, BeginOffset),
- getName(".copyextract"));
+ if (VecTy && !IsWholeAlloca && !IsDest) {
+ Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "load");
+ Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
} else if (IntTy && !IsWholeAlloca && !IsDest) {
Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
+ "load");
Src = convertValue(TD, IRB, Src, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
+ Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract");
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
- getName(".copyload"));
+ "copyload");
}
- if (IntTy && !IsWholeAlloca && IsDest) {
+ if (VecTy && !IsWholeAlloca && IsDest) {
Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
+ "oldload");
+ Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
+ } else if (IntTy && !IsWholeAlloca && IsDest) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ "oldload");
Old = convertValue(TD, IRB, Old, IntTy);
assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
- Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+ Src = insertInteger(TD, IRB, Old, Src, Offset, "insert");
Src = convertValue(TD, IRB, Src, NewAllocaTy);
}
- if (IsVectorElement && IsDest) {
- // We have to insert into a loaded copy before storing.
- Src = IRB.CreateInsertElement(
- IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
- Src, getIndex(IRB, BeginOffset),
- getName(".insert"));
- }
-
StoreInst *Store = cast<StoreInst>(
IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
(void)Store;
@@ -2944,7 +3008,6 @@ private:
assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
II.getIntrinsicID() == Intrinsic::lifetime_end);
DEBUG(dbgs() << " original: " << II << "\n");
- IRBuilder<> IRB(&II);
assert(II.getArgOperand(1) == OldPtr);
// Record this instruction for deletion.
@@ -2960,6 +3023,7 @@ private:
else
New = IRB.CreateLifetimeEnd(Ptr, Size);
+ (void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return true;
}
@@ -2971,7 +3035,9 @@ private:
// as local as possible to the PHI. To do that, we re-use the location of
// the old pointer, which necessarily must be in the right position to
// dominate the PHI.
- IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr));
+ IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr));
+ PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) +
+ ".");
Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
// Replace the operands which were using the old pointer.
@@ -2984,7 +3050,6 @@ private:
bool visitSelectInst(SelectInst &SI) {
DEBUG(dbgs() << " original: " << SI << "\n");
- IRBuilder<> IRB(&SI);
// Find the operand we need to rewrite here.
bool IsTrueVal = SI.getTrueValue() == OldPtr;
@@ -3059,7 +3124,7 @@ private:
class OpSplitter {
protected:
/// The builder used to form new instructions.
- IRBuilder<> IRB;
+ IRBuilderTy IRB;
/// The indices which to be used with insert- or extractvalue to select the
/// appropriate value within the aggregate.
SmallVector<unsigned, 4> Indices;
@@ -3136,9 +3201,8 @@ private:
void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
- Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices,
- Name + ".gep"),
- Name + ".load");
+ Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+ Value *Load = IRB.CreateLoad(GEP, Name + ".load");
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
DEBUG(dbgs() << " to: " << *Load << "\n");
}
@@ -3272,12 +3336,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = TD.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
- if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy))
+ if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
return 0;
- if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy))
+ } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
if (NumSkippedElements >= VecTy->getNumElements())
return 0;
+ }
Offset -= NumSkippedElements * ElementSize;
// First check if we need to recurse.
@@ -3375,7 +3440,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
UE = P.use_end(PI);
UI != UE && !IsLive; ++UI)
- if (UI->U)
+ if (UI->getUse())
IsLive = true;
if (!IsLive)
return false; // No live uses left of this partition.
@@ -3411,7 +3476,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
// Check for the case where we're going to rewrite to a new alloca of the
// exact same type as the original, and with the same access offsets. In that
// case, re-use the existing alloca, but still run through the rewriter to
- // performe phi and select speculation.
+ // perform phi and select speculation.
AllocaInst *NewAI;
if (AllocaTy == AI.getAllocatedType()) {
assert(PI->BeginOffset == 0 &&
@@ -3578,7 +3643,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
/// If there is a domtree available, we attempt to promote using the full power
/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
/// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occured.
+/// promotion occurred.
bool SROA::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
OpenPOWER on IntegriCloud