summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Analysis
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Analysis')
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysis.cpp78
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp5
-rw-r--r--contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/AliasDebugger.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/AliasSetTracker.cpp9
-rw-r--r--contrib/llvm/lib/Analysis/Analysis.cpp7
-rw-r--r--contrib/llvm/lib/Analysis/AssumptionCache.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp195
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp54
-rw-r--r--contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp84
-rw-r--r--contrib/llvm/lib/Analysis/CFGPrinter.cpp4
-rw-r--r--contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp301
-rw-r--r--contrib/llvm/lib/Analysis/CodeMetrics.cpp1
-rw-r--r--contrib/llvm/lib/Analysis/ConstantFolding.cpp287
-rw-r--r--contrib/llvm/lib/Analysis/CostModel.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/Delinearization.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/DependenceAnalysis.cpp90
-rw-r--r--contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp337
-rw-r--r--contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp50
-rw-r--r--contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp52
-rw-r--r--contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp14
-rw-r--r--contrib/llvm/lib/Analysis/IPA/InlineCost.cpp195
-rw-r--r--contrib/llvm/lib/Analysis/IVUsers.cpp15
-rw-r--r--contrib/llvm/lib/Analysis/InstructionSimplify.cpp348
-rw-r--r--contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp95
-rw-r--r--contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp55
-rw-r--r--contrib/llvm/lib/Analysis/LazyValueInfo.cpp118
-rw-r--r--contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp6
-rw-r--r--contrib/llvm/lib/Analysis/LibCallSemantics.cpp27
-rw-r--r--contrib/llvm/lib/Analysis/Lint.cpp560
-rw-r--r--contrib/llvm/lib/Analysis/Loads.cpp28
-rw-r--r--contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp1427
-rw-r--r--contrib/llvm/lib/Analysis/LoopInfo.cpp105
-rw-r--r--contrib/llvm/lib/Analysis/LoopPass.cpp8
-rw-r--r--contrib/llvm/lib/Analysis/MemDepPrinter.cpp19
-rw-r--r--contrib/llvm/lib/Analysis/MemDerefPrinter.cpp70
-rw-r--r--contrib/llvm/lib/Analysis/MemoryBuiltins.cpp67
-rw-r--r--contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp187
-rw-r--r--contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp68
-rw-r--r--contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp7
-rw-r--r--contrib/llvm/lib/Analysis/PHITransAddr.cpp7
-rw-r--r--contrib/llvm/lib/Analysis/RegionPass.cpp27
-rw-r--r--contrib/llvm/lib/Analysis/RegionPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolution.cpp1251
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp3
-rw-r--r--contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp223
-rw-r--r--contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp7
-rw-r--r--contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp603
-rw-r--r--contrib/llvm/lib/Analysis/TargetTransformInfo.cpp656
-rw-r--r--contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp22
-rw-r--r--contrib/llvm/lib/Analysis/ValueTracking.cpp1290
52 files changed, 6442 insertions, 2658 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index 6eea817..f54e234 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -37,7 +38,6 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
-#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
// Register the AliasAnalysis interface, providing a nice name to refer to.
@@ -82,6 +82,23 @@ void AliasAnalysis::addEscapingUse(Use &U) {
AA->addEscapingUse(U);
}
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+ // We may have two calls
+ if (auto CS = ImmutableCallSite(I)) {
+ // Check if the two calls modify the same memory
+ return getModRefInfo(Call, CS);
+ } else {
+ // Otherwise, check if the call modifies or references the
+ // location this memory access defines. The best we can say
+ // is that if the call references what this instruction
+ // defines, it must be clobbered by this location.
+ const AliasAnalysis::Location DefLoc = AA->getLocation(I);
+ if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
+ return AliasAnalysis::ModRef;
+ }
+ return AliasAnalysis::NoModRef;
+}
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
@@ -330,7 +347,7 @@ AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
// If the load address doesn't alias the given address, it doesn't read
// or write the specified memory.
- if (!alias(getLocation(L), Loc))
+ if (Loc.Ptr && !alias(getLocation(L), Loc))
return NoModRef;
// Otherwise, a load just reads.
@@ -343,15 +360,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
if (!S->isUnordered())
return ModRef;
- // If the store address cannot alias the pointer in question, then the
- // specified memory cannot be modified by the store.
- if (!alias(getLocation(S), Loc))
- return NoModRef;
+ if (Loc.Ptr) {
+ // If the store address cannot alias the pointer in question, then the
+ // specified memory cannot be modified by the store.
+ if (!alias(getLocation(S), Loc))
+ return NoModRef;
- // If the pointer is a pointer to constant memory, then it could not have been
- // modified by this store.
- if (pointsToConstantMemory(Loc))
- return NoModRef;
+ // If the pointer is a pointer to constant memory, then it could not have
+ // been modified by this store.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+
+ }
// Otherwise, a store just writes.
return Mod;
@@ -359,15 +379,18 @@ AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
AliasAnalysis::ModRefResult
AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
- // If the va_arg address cannot alias the pointer in question, then the
- // specified memory cannot be accessed by the va_arg.
- if (!alias(getLocation(V), Loc))
- return NoModRef;
- // If the pointer is a pointer to constant memory, then it could not have been
- // modified by this va_arg.
- if (pointsToConstantMemory(Loc))
- return NoModRef;
+ if (Loc.Ptr) {
+ // If the va_arg address cannot alias the pointer in question, then the
+ // specified memory cannot be accessed by the va_arg.
+ if (!alias(getLocation(V), Loc))
+ return NoModRef;
+
+ // If the pointer is a pointer to constant memory, then it could not have
+ // been modified by this va_arg.
+ if (pointsToConstantMemory(Loc))
+ return NoModRef;
+ }
// Otherwise, a va_arg reads and writes.
return ModRef;
@@ -380,7 +403,7 @@ AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
return ModRef;
// If the cmpxchg address does not alias the location, it does not access it.
- if (!alias(getLocation(CX), Loc))
+ if (Loc.Ptr && !alias(getLocation(CX), Loc))
return NoModRef;
return ModRef;
@@ -393,7 +416,7 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) {
return ModRef;
// If the atomicrmw address does not alias the location, it does not access it.
- if (!alias(getLocation(RMW), Loc))
+ if (Loc.Ptr && !alias(getLocation(RMW), Loc))
return NoModRef;
return ModRef;
@@ -407,9 +430,10 @@ AliasAnalysis::ModRefResult
AliasAnalysis::callCapturesBefore(const Instruction *I,
const AliasAnalysis::Location &MemLoc,
DominatorTree *DT) {
- if (!DT || !DL) return AliasAnalysis::ModRef;
+ if (!DT)
+ return AliasAnalysis::ModRef;
- const Value *Object = GetUnderlyingObject(MemLoc.Ptr, DL);
+ const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL);
if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
isa<Constant>(Object))
return AliasAnalysis::ModRef;
@@ -462,10 +486,10 @@ AliasAnalysis::~AliasAnalysis() {}
/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
/// AliasAnalysis interface before any other methods are called.
///
-void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
- DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>();
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) {
+ DL = NewDL;
+ auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TLI = TLIP ? &TLIP->getTLI() : nullptr;
AA = &P->getAnalysis<AliasAnalysis>();
}
@@ -494,7 +518,7 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
/// execution of the specified instructions to mod\ref (according to the
/// mode) the location Loc. The instructions to consider are all
/// of the instructions in the range of [I1,I2] INCLUSIVE.
-/// I1 and I2 must be in the same basic block.
+/// I1 and I2 must be in the same basic block.
bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1,
const Instruction &I2,
const Location &Loc,
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
index b860914..a1bfba1 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -43,7 +44,7 @@ namespace {
errs() << " " << Val << " " << Desc << " responses ("
<< Val*100/Sum << "%)\n";
}
- ~AliasAnalysisCounter() {
+ ~AliasAnalysisCounter() override {
unsigned AASum = No+May+Partial+Must;
unsigned MRSum = NoMR+JustRef+JustMod+MR;
if (AASum + MRSum) { // Print a report if any counted queries occurred...
@@ -76,7 +77,7 @@ namespace {
bool runOnModule(Module &M) override {
this->M = &M;
- InitializeAliasAnalysis(this);
+ InitializeAliasAnalysis(this, &M.getDataLayout());
return false;
}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index fe4bd4c..273eacc 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -158,7 +158,7 @@ bool AAEval::runOnFunction(Function &F) {
if (EvalAAMD && isa<StoreInst>(&*I))
Stores.insert(&*I);
Instruction &Inst = *I;
- if (CallSite CS = cast<Value>(&Inst)) {
+ if (auto CS = CallSite(&Inst)) {
Value *Callee = CS.getCalledValue();
// Skip actual functions for direct function calls.
if (!isa<Function>(Callee) && isInterestingPointer(Callee))
diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
index 5d61cf9..f98b578 100644
--- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp
+++ b/contrib/llvm/lib/Analysis/AliasDebugger.cpp
@@ -44,7 +44,7 @@ namespace {
}
bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this); // set up super class
+ InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class
for(Module::global_iterator I = M.global_begin(),
E = M.global_end(); I != E; ++I) {
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index 45442b0..50890c1 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -182,12 +182,13 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
return false;
}
-bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
+bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
+ AliasAnalysis &AA) const {
if (!Inst->mayReadOrWriteMemory())
return false;
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
- CallSite C1 = getUnknownInst(i), C2 = Inst;
+ ImmutableCallSite C1(getUnknownInst(i)), C2(Inst);
if (!C1 || !C2 ||
AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
@@ -242,7 +243,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
/// containsPointer - Return true if the specified location is represented by
/// this alias set, false otherwise. This does not modify the AST object or
/// alias sets.
-bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+bool AliasSetTracker::containsPointer(const Value *Ptr, uint64_t Size,
const AAMDNodes &AAInfo) const {
for (const_iterator I = begin(), E = end(); I != E; ++I)
if (!I->Forward && I->aliasesPointer(Ptr, Size, AAInfo, AA))
@@ -250,7 +251,7 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
return false;
}
-bool AliasSetTracker::containsUnknown(Instruction *Inst) const {
+bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
for (const_iterator I = begin(), E = end(); I != E; ++I)
if (!I->Forward && I->aliasesUnknownInst(Inst, AA))
return true;
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index f64bf0e..842ff0a 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -37,6 +37,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCFLAliasAnalysisPass(Registry);
initializeDependenceAnalysisPass(Registry);
initializeDelinearizationPass(Registry);
+ initializeDivergenceAnalysisPass(Registry);
initializeDominanceFrontierPass(Registry);
initializeDomViewerPass(Registry);
initializeDomPrinterPass(Registry);
@@ -49,12 +50,12 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeIVUsersPass(Registry);
initializeInstCountPass(Registry);
initializeIntervalPartitionPass(Registry);
- initializeJumpInstrTableInfoPass(Registry);
initializeLazyValueInfoPass(Registry);
initializeLibCallAliasAnalysisPass(Registry);
initializeLintPass(Registry);
- initializeLoopInfoPass(Registry);
+ initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
+ initializeMemDerefPrinterPass(Registry);
initializeMemoryDependenceAnalysisPass(Registry);
initializeModuleDebugInfoPrinterPass(Registry);
initializePostDominatorTreePass(Registry);
@@ -65,7 +66,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeRegionOnlyPrinterPass(Registry);
initializeScalarEvolutionPass(Registry);
initializeScalarEvolutionAliasAnalysisPass(Registry);
- initializeTargetTransformInfoAnalysisGroup(Registry);
+ initializeTargetTransformInfoWrapperPassPass(Registry);
initializeTypeBasedAliasAnalysisPass(Registry);
initializeScopedNoAliasAAPass(Registry);
}
diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
index da5ba18..f468a43 100644
--- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -73,6 +74,20 @@ void AssumptionCache::registerAssumption(CallInst *CI) {
#endif
}
+char AssumptionAnalysis::PassID;
+
+PreservedAnalyses AssumptionPrinterPass::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ AssumptionCache &AC = AM->getResult<AssumptionAnalysis>(F);
+
+ OS << "Cached assumptions for function: " << F.getName() << "\n";
+ for (auto &VH : AC.assumptions())
+ if (VH)
+ OS << " " << *cast<CallInst>(VH)->getArgOperand(0) << "\n";
+
+ return PreservedAnalyses::all();
+}
+
void AssumptionCacheTracker::FunctionCallbackVH::deleted() {
auto I = ACT->AssumptionCaches.find_as(cast<Function>(getValPtr()));
if (I != ACT->AssumptionCaches.end())
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index a9efc5a..a61faca 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -38,7 +39,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include <algorithm>
using namespace llvm;
@@ -103,7 +103,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
const TargetLibraryInfo &TLI,
bool RoundToAlign = false) {
uint64_t Size;
- if (getObjectSize(V, Size, &DL, &TLI, RoundToAlign))
+ if (getObjectSize(V, Size, DL, &TLI, RoundToAlign))
return Size;
return AliasAnalysis::UnknownSize;
}
@@ -221,7 +221,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
case Instruction::Or:
// X|C == X+C if all the bits in C are unset in X. Otherwise we can't
// analyze it.
- if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL, 0, AC,
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC,
BOp, DT))
break;
// FALL THROUGH.
@@ -292,7 +292,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
static const Value *
DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
SmallVectorImpl<VariableGEPIndex> &VarIndices,
- bool &MaxLookupReached, const DataLayout *DL,
+ bool &MaxLookupReached, const DataLayout &DL,
AssumptionCache *AC, DominatorTree *DT) {
// Limit recursion depth to limit compile time in crazy cases.
unsigned MaxLookup = MaxLookupSearchDepth;
@@ -341,16 +341,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
return V;
- // If we are lacking DataLayout information, we can't compute the offets of
- // elements computed by GEPs. However, we can handle bitcast equivalent
- // GEPs.
- if (!DL) {
- if (!GEPOp->hasAllZeroIndices())
- return V;
- V = GEPOp->getOperand(0);
- continue;
- }
-
unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
@@ -363,30 +353,30 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
if (FieldNo == 0) continue;
- BaseOffs += DL->getStructLayout(STy)->getElementOffset(FieldNo);
+ BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);
continue;
}
// For an array/pointer, add the element offset, explicitly scaled.
if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
if (CIdx->isZero()) continue;
- BaseOffs += DL->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+ BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
continue;
}
- uint64_t Scale = DL->getTypeAllocSize(*GTI);
+ uint64_t Scale = DL.getTypeAllocSize(*GTI);
ExtensionKind Extension = EK_NotExtended;
// If the integer type is smaller than the pointer size, it is implicitly
// sign extended to pointer size.
unsigned Width = Index->getType()->getIntegerBitWidth();
- if (DL->getPointerSizeInBits(AS) > Width)
+ if (DL.getPointerSizeInBits(AS) > Width)
Extension = EK_SignExt;
// Use GetLinearExpression to decompose the index into a C1*V+C2 form.
APInt IndexScale(Width, 0), IndexOffset(Width, 0);
- Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
- *DL, 0, AC, DT);
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL,
+ 0, AC, DT);
// The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
// This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
@@ -408,7 +398,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// Make sure that we have a scale that makes sense for this target's
// pointer size.
- if (unsigned ShiftBits = 64 - DL->getPointerSizeInBits(AS)) {
+ if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) {
Scale <<= ShiftBits;
Scale = (int64_t)Scale >> ShiftBits;
}
@@ -461,14 +451,12 @@ namespace {
initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
- void initializePass() override {
- InitializeAliasAnalysis(this);
- }
+ bool doInitialization(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AliasAnalysis>();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
AliasResult alias(const Location &LocA, const Location &LocB) override {
@@ -591,7 +579,7 @@ INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
"Basic Alias Analysis (stateless AA impl)",
false, true, false)
@@ -612,7 +600,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
SmallVector<const Value *, 16> Worklist;
Worklist.push_back(Loc.Ptr);
do {
- const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
+ const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL);
if (!Visited.insert(V).second) {
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -649,8 +637,8 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
Visited.clear();
return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
}
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- Worklist.push_back(PN->getIncomingValue(i));
+ for (Value *IncValue : PN->incoming_values())
+ Worklist.push_back(IncValue);
continue;
}
@@ -706,7 +694,7 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
return DoesNotAccessMemory;
// For intrinsics, we can check the table.
- if (unsigned iid = F->getIntrinsicID()) {
+ if (Intrinsic::ID iid = F->getIntrinsicID()) {
#define GET_INTRINSIC_MODREF_BEHAVIOR
#include "llvm/IR/Intrinsics.gen"
#undef GET_INTRINSIC_MODREF_BEHAVIOR
@@ -718,7 +706,8 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
if (F->onlyReadsMemory())
Min = OnlyReadsMemory;
- const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (isMemsetPattern16(F, TLI))
Min = OnlyAccessesArgumentPointees;
@@ -730,7 +719,8 @@ AliasAnalysis::Location
BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
ModRefResult &Mask) {
Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask);
- const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
if (II != nullptr)
switch (II->getIntrinsicID()) {
@@ -813,6 +803,11 @@ static bool isAssumeIntrinsic(ImmutableCallSite CS) {
return false;
}
+bool BasicAliasAnalysis::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
+
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object. Since we only look at local properties of this
/// function, we really can't say much about this query. We do, however, use
@@ -823,7 +818,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
"AliasAnalysis query involving multiple functions!");
- const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
+ const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL);
// If this is a tail call and Loc.Ptr points to a stack location, we know that
// the tail call cannot access or modify the local stack.
@@ -888,6 +883,99 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
return AliasAnalysis::getModRefInfo(CS1, CS2);
}
+/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP
+/// operators, both having the exact same pointer operand.
+static AliasAnalysis::AliasResult
+aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V1Size,
+ const GEPOperator *GEP2, uint64_t V2Size,
+ const DataLayout &DL) {
+
+ assert(GEP1->getPointerOperand() == GEP2->getPointerOperand() &&
+ "Expected GEPs with the same pointer operand");
+
+ // Try to determine whether GEP1 and GEP2 index through arrays, into structs,
+ // such that the struct field accesses provably cannot alias.
+ // We also need at least two indices (the pointer, and the struct field).
+ if (GEP1->getNumIndices() != GEP2->getNumIndices() ||
+ GEP1->getNumIndices() < 2)
+ return AliasAnalysis::MayAlias;
+
+ // If we don't know the size of the accesses through both GEPs, we can't
+ // determine whether the struct fields accessed can't alias.
+ if (V1Size == AliasAnalysis::UnknownSize ||
+ V2Size == AliasAnalysis::UnknownSize)
+ return AliasAnalysis::MayAlias;
+
+ ConstantInt *C1 =
+ dyn_cast<ConstantInt>(GEP1->getOperand(GEP1->getNumOperands() - 1));
+ ConstantInt *C2 =
+ dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
+
+ // If the last (struct) indices aren't constants, we can't say anything.
+ // If they're identical, the other indices might be also be dynamically
+ // equal, so the GEPs can alias.
+ if (!C1 || !C2 || C1 == C2)
+ return AliasAnalysis::MayAlias;
+
+ // Find the last-indexed type of the GEP, i.e., the type you'd get if
+ // you stripped the last index.
+ // On the way, look at each indexed type. If there's something other
+ // than an array, different indices can lead to different final types.
+ SmallVector<Value *, 8> IntermediateIndices;
+
+ // Insert the first index; we don't need to check the type indexed
+ // through it as it only drops the pointer indirection.
+ assert(GEP1->getNumIndices() > 1 && "Not enough GEP indices to examine");
+ IntermediateIndices.push_back(GEP1->getOperand(1));
+
+ // Insert all the remaining indices but the last one.
+ // Also, check that they all index through arrays.
+ for (unsigned i = 1, e = GEP1->getNumIndices() - 1; i != e; ++i) {
+ if (!isa<ArrayType>(GetElementPtrInst::getIndexedType(
+ GEP1->getSourceElementType(), IntermediateIndices)))
+ return AliasAnalysis::MayAlias;
+ IntermediateIndices.push_back(GEP1->getOperand(i + 1));
+ }
+
+ StructType *LastIndexedStruct =
+ dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
+ GEP1->getSourceElementType(), IntermediateIndices));
+
+ if (!LastIndexedStruct)
+ return AliasAnalysis::MayAlias;
+
+ // We know that:
+ // - both GEPs begin indexing from the exact same pointer;
+ // - the last indices in both GEPs are constants, indexing into a struct;
+ // - said indices are different, hence, the pointed-to fields are different;
+ // - both GEPs only index through arrays prior to that.
+ //
+ // This lets us determine that the struct that GEP1 indexes into and the
+ // struct that GEP2 indexes into must either precisely overlap or be
+ // completely disjoint. Because they cannot partially overlap, indexing into
+ // different non-overlapping fields of the struct will never alias.
+
+ // Therefore, the only remaining thing needed to show that both GEPs can't
+ // alias is that the fields are not overlapping.
+ const StructLayout *SL = DL.getStructLayout(LastIndexedStruct);
+ const uint64_t StructSize = SL->getSizeInBytes();
+ const uint64_t V1Off = SL->getElementOffset(C1->getZExtValue());
+ const uint64_t V2Off = SL->getElementOffset(C2->getZExtValue());
+
+ auto EltsDontOverlap = [StructSize](uint64_t V1Off, uint64_t V1Size,
+ uint64_t V2Off, uint64_t V2Size) {
+ return V1Off < V2Off && V1Off + V1Size <= V2Off &&
+ ((V2Off + V2Size <= StructSize) ||
+ (V2Off + V2Size - StructSize <= V1Off));
+ };
+
+ if (EltsDontOverlap(V1Off, V1Size, V2Off, V2Size) ||
+ EltsDontOverlap(V2Off, V2Size, V1Off, V1Size))
+ return AliasAnalysis::NoAlias;
+
+ return AliasAnalysis::MayAlias;
+}
+
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
@@ -947,10 +1035,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL, AC2, DT);
+ GEP2MaxLookupReached, *DL, AC2, DT);
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AC1, DT);
+ GEP1MaxLookupReached, *DL, AC1, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -979,14 +1067,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// about the relation of the resulting pointer.
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AC1, DT);
+ GEP1MaxLookupReached, *DL, AC1, DT);
int64_t GEP2BaseOffset;
bool GEP2MaxLookupReached;
SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
- GEP2MaxLookupReached, DL, AC2, DT);
+ GEP2MaxLookupReached, *DL, AC2, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -995,6 +1083,17 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
+
+ // If we know the two GEPs are based off of the exact same pointer (and not
+ // just the same underlying object), see if that tells us anything about
+ // the resulting pointers.
+ if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
+ AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL);
+ // If we couldn't find anything interesting, don't abandon just yet.
+ if (R != MayAlias)
+ return R;
+ }
+
// If the max search depth is reached the result is undefined
if (GEP2MaxLookupReached || GEP1MaxLookupReached)
return MayAlias;
@@ -1025,7 +1124,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *GEP1BasePtr =
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
- GEP1MaxLookupReached, DL, AC1, DT);
+ GEP1MaxLookupReached, *DL, AC1, DT);
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
@@ -1094,7 +1193,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
const Value *V = GEP1VariableIndices[i].V;
bool SignKnownZero, SignKnownOne;
- ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+ ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, *DL,
0, AC1, nullptr, DT);
// Zero-extension widens the variable, and so forces the sign
@@ -1239,8 +1338,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
SmallPtrSet<Value*, 4> UniqueSrc;
SmallVector<Value*, 4> V1Srcs;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *PV1 = PN->getIncomingValue(i);
+ for (Value *PV1 : PN->incoming_values()) {
if (isa<PHINode>(PV1))
// If any of the source itself is a PHI, return MayAlias conservatively
// to avoid compile time explosion. The worst possible case is if both
@@ -1290,6 +1388,11 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
V1 = V1->stripPointerCasts();
V2 = V2->stripPointerCasts();
+ // If V1 or V2 is undef, the result is NoAlias because we can always pick a
+ // value for undef that aliases nothing in the program.
+ if (isa<UndefValue>(V1) || isa<UndefValue>(V2))
+ return NoAlias;
+
// Are we checking for alias of the same value?
// Because we look 'through' phi nodes we could look at "Value" pointers from
// different iterations. We must therefore make sure that this is not the
@@ -1303,8 +1406,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
- const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
- const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
+ const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth);
+ const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth);
// Null values in the default address space don't point to any object, so they
// don't alias any other pointer.
@@ -1427,6 +1530,9 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
if (!Inst)
return true;
+ if (VisitedPhiBBs.empty())
+ return true;
+
if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
return false;
@@ -1434,7 +1540,8 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
+ auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+ LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
// Make sure that the visited phis cannot reach the Value. This ensures that
// the Values cannot come from different iterations of a potential cycle the
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 8ed8e3e..3d819eb 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -85,7 +85,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
std::string Result;
raw_string_ostream OS(Result);
- OS << Node->getName().str() << ":";
+ OS << Node->getName() << ":";
switch (ViewBlockFreqPropagationDAG) {
case GVDT_Fraction:
Graph->printBlockFreq(OS, Node);
@@ -108,7 +108,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
"Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
"Block Frequency Analysis", true, true)
@@ -123,13 +123,13 @@ BlockFrequencyInfo::~BlockFrequencyInfo() {}
void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<BranchProbabilityInfo>();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
bool BlockFrequencyInfo::runOnFunction(Function &F) {
BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
- LoopInfo &LI = getAnalysis<LoopInfo>();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
if (!BFI)
BFI.reset(new ImplType);
BFI->doFunction(&F, &BPI, &LI);
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 278073c..456cee1 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -331,32 +331,35 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
return true;
}
-/// \brief Get the maximum allowed loop scale.
-///
-/// Gives the maximum number of estimated iterations allowed for a loop. Very
-/// large numbers cause problems downstream (even within 64-bits).
-static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); }
-
/// \brief Compute the loop scale for a loop.
void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
// Compute loop scale.
DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
+ // Infinite loops need special handling. If we give the back edge an infinite
+ // mass, they may saturate all the other scales in the function down to 1,
+ // making all the other region temperatures look exactly the same. Choose an
+ // arbitrary scale to avoid these issues.
+ //
+ // FIXME: An alternate way would be to select a symbolic scale which is later
+ // replaced to be the maximum of all computed scales plus 1. This would
+ // appropriately describe the loop as having a large scale, without skewing
+ // the final frequency computation.
+ const Scaled64 InifiniteLoopScale(1, 12);
+
// LoopScale == 1 / ExitMass
// ExitMass == HeadMass - BackedgeMass
BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
- // Block scale stores the inverse of the scale.
- Loop.Scale = ExitMass.toScaled().inverse();
+ // Block scale stores the inverse of the scale. If this is an infinite loop,
+ // its exit mass will be zero. In this case, use an arbitrary scale for the
+ // loop scale.
+ Loop.Scale =
+ ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse();
DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
<< " - " << Loop.BackedgeMass << ")\n"
<< " - scale = " << Loop.Scale << "\n");
-
- if (Loop.Scale > getMaxLoopScale()) {
- Loop.Scale = getMaxLoopScale();
- DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
- }
}
/// \brief Package up a loop.
@@ -424,15 +427,24 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
const Scaled64 &Min, const Scaled64 &Max) {
// Scale the Factor to a size that creates integers. Ideally, integers would
// be scaled so that Max == UINT64_MAX so that they can be best
- // differentiated. However, the register allocator currently deals poorly
- // with large numbers. Instead, push Min up a little from 1 to give some
- // room to differentiate small, unequal numbers.
- //
- // TODO: fix issues downstream so that ScalingFactor can be
- // Scaled64(1,64)/Max.
- Scaled64 ScalingFactor = Min.inverse();
- if ((Max / Min).lg() < 60)
+ // differentiated. However, in the presence of large frequency values, small
+ // frequencies are scaled down to 1, making it impossible to differentiate
+ // small, unequal numbers. When the spread between Min and Max frequencies
+ // fits well within MaxBits, we make the scale be at least 8.
+ const unsigned MaxBits = 64;
+ const unsigned SpreadBits = (Max / Min).lg();
+ Scaled64 ScalingFactor;
+ if (SpreadBits <= MaxBits - 3) {
+ // If the values are small enough, make the scaling factor at least 8 to
+ // allow distinguishing small values.
+ ScalingFactor = Min.inverse();
ScalingFactor <<= 3;
+ } else {
+ // If the values need more than MaxBits to be represented, saturate small
+ // frequency values down to 1 by using a scaling factor that benefits large
+ // frequency values.
+ ScalingFactor = Scaled64(1, MaxBits) / Max;
+ }
// Translate the floats to integers.
DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 2b39d47..091943b 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -28,7 +29,7 @@ using namespace llvm;
INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
"Branch Probability Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
"Branch Probability Analysis", false, true)
@@ -114,11 +115,6 @@ static const uint32_t NORMAL_WEIGHT = 16;
// Minimum weight of an edge. Please note, that weight is NEVER 0.
static const uint32_t MIN_WEIGHT = 1;
-static uint32_t getMaxWeightFor(BasicBlock *BB) {
- return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
-}
-
-
/// \brief Calculate edge weights for successors lead to unreachable.
///
/// Predict that a successor which leads necessarily to an
@@ -184,15 +180,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
if (!WeightsNode)
return false;
+ // Check that the number of successors is manageable.
+ assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors");
+
// Ensure there are weights for all of the successors. Note that the first
// operand to the metadata node is a name, not a weight.
if (WeightsNode->getNumOperands() != TI->getNumSuccessors() + 1)
return false;
- // Build up the final weights that will be used in a temporary buffer, but
- // don't add them until all weihts are present. Each weight value is clamped
- // to [1, getMaxWeightFor(BB)].
- uint32_t WeightLimit = getMaxWeightFor(BB);
+ // Build up the final weights that will be used in a temporary buffer.
+ // Compute the sum of all weights to later decide whether they need to
+ // be scaled to fit in 32 bits.
+ uint64_t WeightSum = 0;
SmallVector<uint32_t, 2> Weights;
Weights.reserve(TI->getNumSuccessors());
for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) {
@@ -200,12 +199,26 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(i));
if (!Weight)
return false;
- Weights.push_back(
- std::max<uint32_t>(1, Weight->getLimitedValue(WeightLimit)));
+ assert(Weight->getValue().getActiveBits() <= 32 &&
+ "Too many bits for uint32_t");
+ Weights.push_back(Weight->getZExtValue());
+ WeightSum += Weights.back();
}
assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- setEdgeWeight(BB, i, Weights[i]);
+
+ // If the sum of weights does not fit in 32 bits, scale every weight down
+ // accordingly.
+ uint64_t ScalingFactor =
+ (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1;
+
+ WeightSum = 0;
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ uint32_t W = Weights[i] / ScalingFactor;
+ WeightSum += W;
+ setEdgeWeight(BB, i, W);
+ }
+ assert(WeightSum <= UINT32_MAX &&
+ "Expected weights to scale down to 32 bits");
return true;
}
@@ -378,6 +391,14 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
if (!CV)
return false;
+ // If the LHS is the result of AND'ing a value with a single bit bitmask,
+ // we don't have information about probabilities.
+ if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0)))
+ if (LHS->getOpcode() == Instruction::And)
+ if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(LHS->getOperand(1)))
+ if (AndRHS->getUniqueInteger().isPowerOf2())
+ return false;
+
bool isProb;
if (CV->isZero()) {
switch (CI->getPredicate()) {
@@ -484,7 +505,7 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
}
void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesAll();
}
@@ -492,31 +513,29 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
<< " ----\n\n");
LastF = &F; // Store the last function we ran on for printing.
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
assert(PostDominatedByUnreachable.empty());
assert(PostDominatedByColdCall.empty());
// Walk the basic blocks in post-order so that we can build up state about
// the successors of a block iteratively.
- for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
- E = po_end(&F.getEntryBlock());
- I != E; ++I) {
- DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
- if (calcUnreachableHeuristics(*I))
+ for (auto BB : post_order(&F.getEntryBlock())) {
+ DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+ if (calcUnreachableHeuristics(BB))
continue;
- if (calcMetadataWeights(*I))
+ if (calcMetadataWeights(BB))
continue;
- if (calcColdCallHeuristics(*I))
+ if (calcColdCallHeuristics(BB))
continue;
- if (calcLoopBranchHeuristics(*I))
+ if (calcLoopBranchHeuristics(BB))
continue;
- if (calcPointerHeuristics(*I))
+ if (calcPointerHeuristics(BB))
continue;
- if (calcZeroHeuristics(*I))
+ if (calcZeroHeuristics(BB))
continue;
- if (calcFloatingPointHeuristics(*I))
+ if (calcFloatingPointHeuristics(BB))
continue;
- calcInvokeHeuristics(*I);
+ calcInvokeHeuristics(BB);
}
PostDominatedByUnreachable.clear();
@@ -546,7 +565,7 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
uint32_t PrevSum = Sum;
Sum += Weight;
- assert(Sum > PrevSum); (void) PrevSum;
+ assert(Sum >= PrevSum); (void) PrevSum;
}
return Sum;
@@ -609,14 +628,17 @@ uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src,
uint32_t BranchProbabilityInfo::
getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
uint32_t Weight = 0;
+ bool FoundWeight = false;
DenseMap<Edge, uint32_t>::const_iterator MapI;
for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
if (*I == Dst) {
MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex()));
- if (MapI != Weights.end())
+ if (MapI != Weights.end()) {
+ FoundWeight = true;
Weight += MapI->second;
+ }
}
- return (Weight == 0) ? DEFAULT_WEIGHT : Weight;
+ return (!FoundWeight) ? DEFAULT_WEIGHT : Weight;
}
/// Set the edge weight for a given edge specified by PredBlock and an index
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
index 89787f8..c86f1f5 100644
--- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -77,7 +77,7 @@ namespace {
}
bool runOnFunction(Function &F) override {
- std::string Filename = "cfg." + F.getName().str() + ".dot";
+ std::string Filename = ("cfg." + F.getName() + ".dot").str();
errs() << "Writing '" << Filename << "'...";
std::error_code EC;
@@ -111,7 +111,7 @@ namespace {
}
bool runOnFunction(Function &F) override {
- std::string Filename = "cfg." + F.getName().str() + ".dot";
+ std::string Filename = ("cfg." + F.getName() + ".dot").str();
errs() << "Writing '" << Filename << "'...";
std::error_code EC;
diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
index 88bb84a..84b31df 100644
--- a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp
@@ -43,14 +43,19 @@
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <forward_list>
+#include <memory>
#include <tuple>
using namespace llvm;
+#define DEBUG_TYPE "cfl-aa"
+
// Try to go from a Value* to a Function*. Never returns nullptr.
static Optional<Function *> parentFunctionOfValue(Value *);
@@ -74,7 +79,7 @@ static Optional<Value *> getTargetValue(Instruction *);
static bool hasUsefulEdges(Instruction *);
const StratifiedIndex StratifiedLink::SetSentinel =
- std::numeric_limits<StratifiedIndex>::max();
+ std::numeric_limits<StratifiedIndex>::max();
namespace {
// StratifiedInfo Attribute things.
@@ -82,11 +87,13 @@ typedef unsigned StratifiedAttr;
LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs;
LLVM_CONSTEXPR unsigned AttrAllIndex = 0;
LLVM_CONSTEXPR unsigned AttrGlobalIndex = 1;
-LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 2;
+LLVM_CONSTEXPR unsigned AttrUnknownIndex = 2;
+LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 3;
LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex;
LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
LLVM_CONSTEXPR StratifiedAttr AttrNone = 0;
+LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex;
LLVM_CONSTEXPR StratifiedAttr AttrAll = ~AttrNone;
// \brief StratifiedSets call for knowledge of "direction", so this is how we
@@ -141,9 +148,8 @@ struct FunctionInfo {
// Lots of functions have < 4 returns. Adjust as necessary.
SmallVector<Value *, 4> ReturnedValues;
- FunctionInfo(StratifiedSets<Value *> &&S,
- SmallVector<Value *, 4> &&RV)
- : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+ FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
+ : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
};
struct CFLAliasAnalysis;
@@ -155,7 +161,7 @@ struct FunctionHandle : public CallbackVH {
assert(CFLAA != nullptr);
}
- virtual ~FunctionHandle() {}
+ ~FunctionHandle() override {}
void deleted() override { removeSelfFromCache(); }
void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
@@ -183,7 +189,7 @@ public:
initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
- virtual ~CFLAliasAnalysis() {}
+ ~CFLAliasAnalysis() override {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AliasAnalysis::getAnalysisUsage(AU);
@@ -226,14 +232,22 @@ public:
// Comparisons between global variables and other constants should be
// handled by BasicAA.
+ // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing
+ // a GlobalValue and ConstantExpr, but every query needs to have at least
+ // one Value tied to a Function, and neither GlobalValues nor ConstantExprs
+ // are.
if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
- return MayAlias;
+ return AliasAnalysis::alias(LocA, LocB);
}
- return query(LocA, LocB);
+ AliasResult QueryResult = query(LocA, LocB);
+ if (QueryResult == MayAlias)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ return QueryResult;
}
- void initializePass() override { InitializeAliasAnalysis(this); }
+ bool doInitialization(Module &M) override;
};
void FunctionHandle::removeSelfFromCache() {
@@ -256,9 +270,19 @@ public:
llvm_unreachable("Unsupported instruction encountered");
}
+ void visitPtrToIntInst(PtrToIntInst &Inst) {
+ auto *Ptr = Inst.getOperand(0);
+ Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown));
+ }
+
+ void visitIntToPtrInst(IntToPtrInst &Inst) {
+ auto *Ptr = &Inst;
+ Output.push_back(Edge(Ptr, Ptr, EdgeType::Assign, AttrUnknown));
+ }
+
void visitCastInst(CastInst &Inst) {
- Output.push_back(Edge(&Inst, Inst.getOperand(0), EdgeType::Assign,
- AttrNone));
+ Output.push_back(
+ Edge(&Inst, Inst.getOperand(0), EdgeType::Assign, AttrNone));
}
void visitBinaryOperator(BinaryOperator &Inst) {
@@ -281,8 +305,7 @@ public:
}
void visitPHINode(PHINode &Inst) {
- for (unsigned I = 0, E = Inst.getNumIncomingValues(); I != E; ++I) {
- Value *Val = Inst.getIncomingValue(I);
+ for (Value *Val : Inst.incoming_values()) {
Output.push_back(Edge(&Inst, Val, EdgeType::Assign, AttrNone));
}
}
@@ -295,8 +318,11 @@ public:
}
void visitSelectInst(SelectInst &Inst) {
- auto *Condition = Inst.getCondition();
- Output.push_back(Edge(&Inst, Condition, EdgeType::Assign, AttrNone));
+ // Condition is not processed here (The actual statement producing
+ // the condition result is processed elsewhere). For select, the
+ // condition is evaluated, but not loaded, stored, or assigned
+ // simply as a result of being the condition of a select.
+
auto *TrueVal = Inst.getTrueValue();
Output.push_back(Edge(&Inst, TrueVal, EdgeType::Assign, AttrNone));
auto *FalseVal = Inst.getFalseValue();
@@ -367,7 +393,7 @@ public:
// I put this here to give us an upper bound on time taken by IPA. Is it
// really (realistically) needed? Keep in mind that we do have an n^2 algo.
- if (std::distance(Args.begin(), Args.end()) > (int) MaxSupportedArgs)
+ if (std::distance(Args.begin(), Args.end()) > (int)MaxSupportedArgs)
return false;
// Exit early if we'll fail anyway
@@ -419,7 +445,7 @@ public:
}
if (AddEdge)
Output.push_back(Edge(FuncValue, ArgVal, EdgeType::Assign,
- StratifiedAttrs().flip()));
+ StratifiedAttrs().flip()));
}
if (Parameters.size() != Arguments.size())
@@ -561,8 +587,7 @@ private:
EdgeTypeT Weight;
Node Other;
- Edge(const EdgeTypeT &W, const Node &N)
- : Weight(W), Other(N) {}
+ Edge(const EdgeTypeT &W, const Node &N) : Weight(W), Other(N) {}
bool operator==(const Edge &E) const {
return Weight == E.Weight && Other == E.Other;
@@ -725,6 +750,25 @@ static Level directionOfEdgeType(EdgeType);
static void buildGraphFrom(CFLAliasAnalysis &, Function *,
SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
+// Gets the edges of a ConstantExpr as if it was an Instruction. This
+// function also acts on any nested ConstantExprs, adding the edges
+// of those to the given SmallVector as well.
+static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
+ SmallVectorImpl<Edge> &);
+
+// Given an Instruction, this will add it to the graph, along with any
+// Instructions that are potentially only available from said Instruction
+// For example, given the following line:
+// %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
+// addInstructionToGraph would add both the `load` and `getelementptr`
+// instructions to the graph appropriately.
+static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &,
+ SmallVectorImpl<Value *> &, NodeMapT &,
+ GraphT &);
+
+// Notes whether it would be pointless to add the given Value to our sets.
+static bool canSkipAddingToSets(Value *Val);
+
// Builds the graph + StratifiedSets for a function.
static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
@@ -768,13 +812,16 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val) {
return AttrGlobalIndex;
if (auto *Arg = dyn_cast<Argument>(Val))
- if (!Arg->hasNoAliasAttr())
+ // Only pointer arguments should have the argument attribute,
+ // because things can't escape through scalars without us seeing a
+ // cast, and thus, interaction with them doesn't matter.
+ if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy())
return argNumberToAttrIndex(Arg->getArgNo());
return NoneType();
}
static StratifiedAttr argNumberToAttrIndex(unsigned ArgNum) {
- if (ArgNum > AttrMaxNumArgs)
+ if (ArgNum >= AttrMaxNumArgs)
return AttrAllIndex;
return ArgNum + AttrFirstArgIndex;
}
@@ -793,6 +840,8 @@ static EdgeType flipWeight(EdgeType Initial) {
static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
SmallVectorImpl<Edge> &Output) {
+ assert(hasUsefulEdges(Inst) &&
+ "Expected instructions to have 'useful' edges");
GetEdgesVisitor v(Analysis, Output);
v.visit(Inst);
}
@@ -809,13 +858,41 @@ static Level directionOfEdgeType(EdgeType Weight) {
llvm_unreachable("Incomplete switch coverage");
}
-// Aside: We may remove graph construction entirely, because it doesn't really
-// buy us much that we don't already have. I'd like to add interprocedural
-// analysis prior to this however, in case that somehow requires the graph
-// produced by this for efficient execution
-static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
- SmallVectorImpl<Value *> &ReturnedValues,
- NodeMapT &Map, GraphT &Graph) {
+static void constexprToEdges(CFLAliasAnalysis &Analysis,
+ ConstantExpr &CExprToCollapse,
+ SmallVectorImpl<Edge> &Results) {
+ SmallVector<ConstantExpr *, 4> Worklist;
+ Worklist.push_back(&CExprToCollapse);
+
+ SmallVector<Edge, 8> ConstexprEdges;
+ while (!Worklist.empty()) {
+ auto *CExpr = Worklist.pop_back_val();
+ std::unique_ptr<Instruction> Inst(CExpr->getAsInstruction());
+
+ if (!hasUsefulEdges(Inst.get()))
+ continue;
+
+ ConstexprEdges.clear();
+ argsToEdges(Analysis, Inst.get(), ConstexprEdges);
+ for (auto &Edge : ConstexprEdges) {
+ if (Edge.From == Inst.get())
+ Edge.From = CExpr;
+ else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.From))
+ Worklist.push_back(Nested);
+
+ if (Edge.To == Inst.get())
+ Edge.To = CExpr;
+ else if (auto *Nested = dyn_cast<ConstantExpr>(Edge.To))
+ Worklist.push_back(Nested);
+ }
+
+ Results.append(ConstexprEdges.begin(), ConstexprEdges.end());
+ }
+}
+
+static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
+ SmallVectorImpl<Value *> &ReturnedValues,
+ NodeMapT &Map, GraphT &Graph) {
const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
auto Pair = Map.insert(std::make_pair(Val, GraphT::Node()));
auto &Iter = Pair.first;
@@ -826,42 +903,86 @@ static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
return Iter->second;
};
+ // We don't want the edges of most "return" instructions, but we *do* want
+ // to know what can be returned.
+ if (isa<ReturnInst>(&Inst))
+ ReturnedValues.push_back(&Inst);
+
+ if (!hasUsefulEdges(&Inst))
+ return;
+
SmallVector<Edge, 8> Edges;
- for (auto &Bb : Fn->getBasicBlockList()) {
- for (auto &Inst : Bb.getInstList()) {
- // We don't want the edges of most "return" instructions, but we *do* want
- // to know what can be returned.
- if (auto *Ret = dyn_cast<ReturnInst>(&Inst))
- ReturnedValues.push_back(Ret);
-
- if (!hasUsefulEdges(&Inst))
- continue;
+ argsToEdges(Analysis, &Inst, Edges);
+
+ // In the case of an unused alloca (or similar), edges may be empty. Note
+ // that it exists so we can potentially answer NoAlias.
+ if (Edges.empty()) {
+ auto MaybeVal = getTargetValue(&Inst);
+ assert(MaybeVal.hasValue());
+ auto *Target = *MaybeVal;
+ findOrInsertNode(Target);
+ return;
+ }
- Edges.clear();
- argsToEdges(Analysis, &Inst, Edges);
+ const auto addEdgeToGraph = [&Graph, &findOrInsertNode](const Edge &E) {
+ auto To = findOrInsertNode(E.To);
+ auto From = findOrInsertNode(E.From);
+ auto FlippedWeight = flipWeight(E.Weight);
+ auto Attrs = E.AdditionalAttrs;
+ Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
+ std::make_pair(FlippedWeight, Attrs));
+ };
- // In the case of an unused alloca (or similar), edges may be empty. Note
- // that it exists so we can potentially answer NoAlias.
- if (Edges.empty()) {
- auto MaybeVal = getTargetValue(&Inst);
- assert(MaybeVal.hasValue());
- auto *Target = *MaybeVal;
- findOrInsertNode(Target);
- continue;
- }
+ SmallVector<ConstantExpr *, 4> ConstantExprs;
+ for (const Edge &E : Edges) {
+ addEdgeToGraph(E);
+ if (auto *Constexpr = dyn_cast<ConstantExpr>(E.To))
+ ConstantExprs.push_back(Constexpr);
+ if (auto *Constexpr = dyn_cast<ConstantExpr>(E.From))
+ ConstantExprs.push_back(Constexpr);
+ }
- for (const Edge &E : Edges) {
- auto To = findOrInsertNode(E.To);
- auto From = findOrInsertNode(E.From);
- auto FlippedWeight = flipWeight(E.Weight);
- auto Attrs = E.AdditionalAttrs;
- Graph.addEdge(From, To, std::make_pair(E.Weight, Attrs),
- std::make_pair(FlippedWeight, Attrs));
- }
- }
+ for (ConstantExpr *CE : ConstantExprs) {
+ Edges.clear();
+ constexprToEdges(Analysis, *CE, Edges);
+ std::for_each(Edges.begin(), Edges.end(), addEdgeToGraph);
}
}
+// Aside: We may remove graph construction entirely, because it doesn't really
+// buy us much that we don't already have. I'd like to add interprocedural
+// analysis prior to this however, in case that somehow requires the graph
+// produced by this for efficient execution
+static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
+ SmallVectorImpl<Value *> &ReturnedValues,
+ NodeMapT &Map, GraphT &Graph) {
+ for (auto &Bb : Fn->getBasicBlockList())
+ for (auto &Inst : Bb.getInstList())
+ addInstructionToGraph(Analysis, Inst, ReturnedValues, Map, Graph);
+}
+
+static bool canSkipAddingToSets(Value *Val) {
+ // Constants can share instances, which may falsely unify multiple
+ // sets, e.g. in
+ // store i32* null, i32** %ptr1
+ // store i32* null, i32** %ptr2
+ // clearly ptr1 and ptr2 should not be unified into the same set, so
+ // we should filter out the (potentially shared) instance to
+ // i32* null.
+ if (isa<Constant>(Val)) {
+ bool Container = isa<ConstantVector>(Val) || isa<ConstantArray>(Val) ||
+ isa<ConstantStruct>(Val);
+ // TODO: Because all of these things are constant, we can determine whether
+ // the data is *actually* mutable at graph building time. This will probably
+ // come for free/cheap with offset awareness.
+ bool CanStoreMutableData =
+ isa<GlobalValue>(Val) || isa<ConstantExpr>(Val) || Container;
+ return !CanStoreMutableData;
+ }
+
+ return false;
+}
+
static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
NodeMapT Map;
GraphT Graph;
@@ -893,7 +1014,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
while (!Worklist.empty()) {
auto Node = Worklist.pop_back_val();
auto *CurValue = findValueOrDie(Node);
- if (isa<Constant>(CurValue) && !isa<GlobalValue>(CurValue))
+ if (canSkipAddingToSets(CurValue))
continue;
for (const auto &EdgeTuple : Graph.edgesFor(Node)) {
@@ -902,7 +1023,7 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
auto &OtherNode = std::get<1>(EdgeTuple);
auto *OtherValue = findValueOrDie(OtherNode);
- if (isa<Constant>(OtherValue) && !isa<GlobalValue>(OtherValue))
+ if (canSkipAddingToSets(OtherValue))
continue;
bool Added;
@@ -918,16 +1039,16 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
break;
}
- if (Added) {
- auto Aliasing = Weight.second;
- if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
- Aliasing.set(*MaybeCurIndex);
- if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
- Aliasing.set(*MaybeOtherIndex);
- Builder.noteAttributes(CurValue, Aliasing);
- Builder.noteAttributes(OtherValue, Aliasing);
+ auto Aliasing = Weight.second;
+ if (auto MaybeCurIndex = valueToAttrIndex(CurValue))
+ Aliasing.set(*MaybeCurIndex);
+ if (auto MaybeOtherIndex = valueToAttrIndex(OtherValue))
+ Aliasing.set(*MaybeOtherIndex);
+ Builder.noteAttributes(CurValue, Aliasing);
+ Builder.noteAttributes(OtherValue, Aliasing);
+
+ if (Added)
Worklist.push_back(OtherNode);
- }
}
}
}
@@ -937,7 +1058,12 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
// things that were present during construction being present in the graph.
// So, we add all present arguments here.
for (auto &Arg : Fn->args()) {
- Builder.add(&Arg);
+ if (!Builder.add(&Arg))
+ continue;
+
+ auto Attrs = valueToAttrIndex(&Arg);
+ if (Attrs.hasValue())
+ Builder.noteAttributes(&Arg, *Attrs);
}
return FunctionInfo(Builder.build(), std::move(ReturnedValues));
@@ -964,8 +1090,10 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
auto MaybeFnA = parentFunctionOfValue(ValA);
auto MaybeFnB = parentFunctionOfValue(ValB);
if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
- llvm_unreachable("Don't know how to extract the parent function "
- "from values A or B");
+ // The only times this is known to happen are when globals + InlineAsm
+ // are involved
+ DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n");
+ return AliasAnalysis::MayAlias;
}
if (MaybeFnA.hasValue()) {
@@ -991,23 +1119,36 @@ CFLAliasAnalysis::query(const AliasAnalysis::Location &LocA,
auto SetA = *MaybeA;
auto SetB = *MaybeB;
-
- if (SetA.Index == SetB.Index)
- return AliasAnalysis::PartialAlias;
-
auto AttrsA = Sets.getLink(SetA.Index).Attrs;
auto AttrsB = Sets.getLink(SetB.Index).Attrs;
+
// Stratified set attributes are used as markets to signify whether a member
- // of a StratifiedSet (or a member of a set above the current set) has
+ // of a StratifiedSet (or a member of a set above the current set) has
// interacted with either arguments or globals. "Interacted with" meaning
- // its value may be different depending on the value of an argument or
+ // its value may be different depending on the value of an argument or
// global. The thought behind this is that, because arguments and globals
// may alias each other, if AttrsA and AttrsB have touched args/globals,
- // we must conservatively say that they alias. However, if at least one of
- // the sets has no values that could legally be altered by changing the value
+ // we must conservatively say that they alias. However, if at least one of
+ // the sets has no values that could legally be altered by changing the value
// of an argument or global, then we don't have to be as conservative.
if (AttrsA.any() && AttrsB.any())
return AliasAnalysis::MayAlias;
+ // We currently unify things even if the accesses to them may not be in
+ // bounds, so we can't return partial alias here because we don't
+ // know whether the pointer is really within the object or not.
+ // IE Given an out of bounds GEP and an alloca'd pointer, we may
+ // unify the two. We can't return partial alias for this case.
+ // Since we do not currently track enough information to
+ // differentiate
+
+ if (SetA.Index == SetB.Index)
+ return AliasAnalysis::MayAlias;
+
return AliasAnalysis::NoAlias;
}
+
+bool CFLAliasAnalysis::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
index fa5683c..46a2c43 100644
--- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "code-metrics"
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index fd8f2ae..2f4c6a9 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Config/config.h"
#include "llvm/IR/Constants.h"
@@ -33,7 +34,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include <cerrno>
#include <cmath>
@@ -50,8 +50,7 @@ using namespace llvm;
/// Constant fold bitcast, symbolically evaluating it with DataLayout.
/// This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
-static Constant *FoldBitCast(Constant *C, Type *DestTy,
- const DataLayout &TD) {
+static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
// Catch the obvious splat cases.
if (C->isNullValue() && !DestTy->isX86_MMXTy())
return Constant::getNullValue(DestTy);
@@ -84,11 +83,11 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// Now that we know that the input value is a vector of integers, just shift
// and insert them into our result.
- unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy);
+ unsigned BitShift = DL.getTypeAllocSizeInBits(SrcEltTy);
APInt Result(IT->getBitWidth(), 0);
for (unsigned i = 0; i != NumSrcElts; ++i) {
Result <<= BitShift;
- if (TD.isLittleEndian())
+ if (DL.isLittleEndian())
Result |= CDV->getElementAsInteger(NumSrcElts-i-1);
else
Result |= CDV->getElementAsInteger(i);
@@ -106,7 +105,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// vector so the code below can handle it uniformly.
if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
Constant *Ops = C; // don't take the address of C!
- return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
+ return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
}
// If this is a bitcast from constant vector -> vector, fold it.
@@ -138,7 +137,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
Type *DestIVTy =
VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
// Recursively handle this integer conversion, if possible.
- C = FoldBitCast(C, DestIVTy, TD);
+ C = FoldBitCast(C, DestIVTy, DL);
// Finally, IR can handle this now that #elts line up.
return ConstantExpr::getBitCast(C, DestTy);
@@ -162,7 +161,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// of the same size, and that their #elements is not the same. Do the
// conversion here, which depends on whether the input or output has
// more elements.
- bool isLittleEndian = TD.isLittleEndian();
+ bool isLittleEndian = DL.isLittleEndian();
SmallVector<Constant*, 32> Result;
if (NumDstElt < NumSrcElt) {
@@ -198,7 +197,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
unsigned Ratio = NumDstElt/NumSrcElt;
- unsigned DstBitSize = TD.getTypeSizeInBits(DstEltTy);
+ unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
// Loop over each source value, expanding into multiple results.
for (unsigned i = 0; i != NumSrcElt; ++i) {
@@ -235,10 +234,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
/// If this constant is a constant offset from a global, return the global and
/// the constant. Because of constantexprs, this function is recursive.
static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
- APInt &Offset, const DataLayout &TD) {
+ APInt &Offset, const DataLayout &DL) {
// Trivial case, constant is the global.
if ((GV = dyn_cast<GlobalValue>(C))) {
- unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType());
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(GV->getType());
Offset = APInt(BitWidth, 0);
return true;
}
@@ -251,22 +250,22 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
if (CE->getOpcode() == Instruction::PtrToInt ||
CE->getOpcode() == Instruction::BitCast ||
CE->getOpcode() == Instruction::AddrSpaceCast)
- return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+ return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
GEPOperator *GEP = dyn_cast<GEPOperator>(CE);
if (!GEP)
return false;
- unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType());
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType());
APInt TmpOffset(BitWidth, 0);
// If the base isn't a global+constant, we aren't either.
- if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD))
+ if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL))
return false;
// Otherwise, add any offset that our operands provide.
- if (!GEP->accumulateConstantOffset(TD, TmpOffset))
+ if (!GEP->accumulateConstantOffset(DL, TmpOffset))
return false;
Offset = TmpOffset;
@@ -276,11 +275,11 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
/// Recursive helper to read bits out of global. C is the constant being copied
/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
/// results into and BytesLeft is the number of bytes left in
-/// the CurPtr buffer. TD is the target data.
+/// the CurPtr buffer. DL is the DataLayout.
static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
unsigned char *CurPtr, unsigned BytesLeft,
- const DataLayout &TD) {
- assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
+ const DataLayout &DL) {
+ assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
"Out of range access");
// If this element is zero or undefined, we can just return since *CurPtr is
@@ -298,7 +297,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
int n = ByteOffset;
- if (!TD.isLittleEndian())
+ if (!DL.isLittleEndian())
n = IntBytes - n - 1;
CurPtr[i] = (unsigned char)(Val >> (n * 8));
++ByteOffset;
@@ -308,22 +307,22 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
if (CFP->getType()->isDoubleTy()) {
- C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
- return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
if (CFP->getType()->isFloatTy()){
- C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
- return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
if (CFP->getType()->isHalfTy()){
- C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD);
- return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+ C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
+ return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
return false;
}
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
- const StructLayout *SL = TD.getStructLayout(CS->getType());
+ const StructLayout *SL = DL.getStructLayout(CS->getType());
unsigned Index = SL->getElementContainingOffset(ByteOffset);
uint64_t CurEltOffset = SL->getElementOffset(Index);
ByteOffset -= CurEltOffset;
@@ -331,11 +330,11 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
while (1) {
// If the element access is to the element itself and not to tail padding,
// read the bytes from the element.
- uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType());
+ uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
if (ByteOffset < EltSize &&
!ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
- BytesLeft, TD))
+ BytesLeft, DL))
return false;
++Index;
@@ -362,7 +361,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
isa<ConstantDataSequential>(C)) {
Type *EltTy = C->getType()->getSequentialElementType();
- uint64_t EltSize = TD.getTypeAllocSize(EltTy);
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
uint64_t NumElts;
@@ -373,7 +372,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
for (; Index != NumElts; ++Index) {
if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
- BytesLeft, TD))
+ BytesLeft, DL))
return false;
uint64_t BytesWritten = EltSize - Offset;
@@ -390,9 +389,9 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr &&
- CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) {
+ CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
- BytesLeft, TD);
+ BytesLeft, DL);
}
}
@@ -401,7 +400,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
}
static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
- const DataLayout &TD) {
+ const DataLayout &DL) {
PointerType *PTy = cast<PointerType>(C->getType());
Type *LoadTy = PTy->getElementType();
IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
@@ -423,14 +422,13 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
MapTy = Type::getInt64PtrTy(C->getContext(), AS);
else if (LoadTy->isVectorTy()) {
MapTy = PointerType::getIntNPtrTy(C->getContext(),
- TD.getTypeAllocSizeInBits(LoadTy),
- AS);
+ DL.getTypeAllocSizeInBits(LoadTy), AS);
} else
return nullptr;
- C = FoldBitCast(C, MapTy, TD);
- if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
- return FoldBitCast(Res, LoadTy, TD);
+ C = FoldBitCast(C, MapTy, DL);
+ if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, DL))
+ return FoldBitCast(Res, LoadTy, DL);
return nullptr;
}
@@ -440,7 +438,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
GlobalValue *GVal;
APInt Offset;
- if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
+ if (!IsConstantOffsetFromGlobal(C, GVal, Offset, DL))
return nullptr;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
@@ -455,16 +453,16 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
// If we're not accessing anything in this constant, the result is undefined.
if (Offset.getZExtValue() >=
- TD.getTypeAllocSize(GV->getInitializer()->getType()))
+ DL.getTypeAllocSize(GV->getInitializer()->getType()))
return UndefValue::get(IntType);
unsigned char RawBytes[32] = {0};
if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
- BytesLoaded, TD))
+ BytesLoaded, DL))
return nullptr;
APInt ResultVal = APInt(IntType->getBitWidth(), 0);
- if (TD.isLittleEndian()) {
+ if (DL.isLittleEndian()) {
ResultVal = RawBytes[BytesLoaded - 1];
for (unsigned i = 1; i != BytesLoaded; ++i) {
ResultVal <<= 8;
@@ -482,9 +480,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
}
static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
- const DataLayout *DL) {
- if (!DL)
- return nullptr;
+ const DataLayout &DL) {
auto *DestPtrTy = dyn_cast<PointerType>(CE->getType());
if (!DestPtrTy)
return nullptr;
@@ -499,7 +495,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
// If the type sizes are the same and a cast is legal, just directly
// cast the constant.
- if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) {
+ if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
Instruction::CastOps Cast = Instruction::BitCast;
// If we are going from a pointer to int or vice versa, we spell the cast
// differently.
@@ -530,7 +526,7 @@ static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
/// Return the value that a load from C would produce if it is constant and
/// determinable. If this is not determinable, return null.
Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
- const DataLayout *TD) {
+ const DataLayout &DL) {
// First, try the easy cases:
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
@@ -552,13 +548,13 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
}
if (CE->getOpcode() == Instruction::BitCast)
- if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD))
+ if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, DL))
return LoadedC;
// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
StringRef Str;
- if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) {
+ if (getConstantStringInfo(CE, Str) && !Str.empty()) {
unsigned StrLen = Str.size();
Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned NumBits = Ty->getPrimitiveSizeInBits();
@@ -568,7 +564,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
(isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
APInt StrVal(NumBits, 0);
APInt SingleChar(NumBits, 0);
- if (TD->isLittleEndian()) {
+ if (DL.isLittleEndian()) {
for (signed i = StrLen-1; i >= 0; i--) {
SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
StrVal = (StrVal << 8) | SingleChar;
@@ -593,7 +589,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// If this load comes from anywhere in a constant global, and if the global
// is all undef or zero, we know what it loads.
if (GlobalVariable *GV =
- dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
+ dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, DL))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
Type *ResTy = cast<PointerType>(C->getType())->getElementType();
if (GV->getInitializer()->isNullValue())
@@ -604,16 +600,15 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
}
// Try hard to fold loads from bitcasted strange and non-type-safe things.
- if (TD)
- return FoldReinterpretLoadFromConstPtr(CE, *TD);
- return nullptr;
+ return FoldReinterpretLoadFromConstPtr(CE, DL);
}
-static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
+static Constant *ConstantFoldLoadInst(const LoadInst *LI,
+ const DataLayout &DL) {
if (LI->isVolatile()) return nullptr;
if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
- return ConstantFoldLoadFromConstPtr(C, TD);
+ return ConstantFoldLoadFromConstPtr(C, DL);
return nullptr;
}
@@ -623,16 +618,16 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
/// these together. If target data info is available, it is provided as DL,
/// otherwise DL is null.
static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
- Constant *Op1, const DataLayout *DL){
+ Constant *Op1,
+ const DataLayout &DL) {
// SROA
// Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
// Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
// bits.
-
- if (Opc == Instruction::And && DL) {
- unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
+ if (Opc == Instruction::And) {
+ unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType());
APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
computeKnownBits(Op0, KnownZero0, KnownOne0, DL);
@@ -655,14 +650,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
// If the constant expr is something like &A[123] - &A[4].f, fold this into a
// constant. This happens frequently when iterating over a global array.
- if (Opc == Instruction::Sub && DL) {
+ if (Opc == Instruction::Sub) {
GlobalValue *GV1, *GV2;
APInt Offs1, Offs2;
- if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
- if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
- GV1 == GV2) {
- unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+ if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
+ if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
+ unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
// (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
// PtrToInt may change the bitwidth so we have convert to the right size
@@ -677,21 +671,19 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
/// If array indices are not pointer-sized integers, explicitly cast them so
/// that they aren't implicitly casted by the getelementptr.
-static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
- Type *ResultTy, const DataLayout *TD,
+static Constant *CastGEPIndices(Type *SrcTy, ArrayRef<Constant *> Ops,
+ Type *ResultTy, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TD)
- return nullptr;
-
- Type *IntPtrTy = TD->getIntPtrType(ResultTy);
+ Type *IntPtrTy = DL.getIntPtrType(ResultTy);
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
if ((i == 1 ||
!isa<StructType>(GetElementPtrInst::getIndexedType(
- Ops[0]->getType(),
- Ops.slice(1, i - 1)))) &&
+ cast<PointerType>(Ops[0]->getType()->getScalarType())
+ ->getElementType(),
+ Ops.slice(1, i - 1)))) &&
Ops[i]->getType() != IntPtrTy) {
Any = true;
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -706,9 +698,9 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
if (!Any)
return nullptr;
- Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+ Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ops[0], NewIdxs);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
C = Folded;
}
@@ -732,15 +724,15 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
}
/// If we can symbolically evaluate the GEP constant expression, do so.
-static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
- Type *ResultTy, const DataLayout *TD,
+static Constant *SymbolicallyEvaluateGEP(Type *SrcTy, ArrayRef<Constant *> Ops,
+ Type *ResultTy, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
Constant *Ptr = Ops[0];
- if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
+ if (!Ptr->getType()->getPointerElementType()->isSized() ||
!Ptr->getType()->isPointerTy())
return nullptr;
- Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
+ Type *IntPtrTy = DL.getIntPtrType(Ptr->getType());
Type *ResultElementTy = ResultTy->getPointerElementType();
// If this is a constant expr gep that is effectively computing an
@@ -760,19 +752,19 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Res = ConstantExpr::getSub(Res, CE->getOperand(1));
Res = ConstantExpr::getIntToPtr(Res, ResultTy);
if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
- Res = ConstantFoldConstantExpression(ResCE, TD, TLI);
+ Res = ConstantFoldConstantExpression(ResCE, DL, TLI);
return Res;
}
}
return nullptr;
}
- unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
+ unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy);
APInt Offset =
- APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(),
- makeArrayRef((Value *const*)
- Ops.data() + 1,
- Ops.size() - 1)));
+ APInt(BitWidth,
+ DL.getIndexedOffset(
+ Ptr->getType(),
+ makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1)));
Ptr = StripPtrCastKeepAS(Ptr);
// If this is a GEP of a GEP, fold it all into a single GEP.
@@ -790,8 +782,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
break;
Ptr = cast<Constant>(GEP->getOperand(0));
- Offset += APInt(BitWidth,
- TD->getIndexedOffset(Ptr->getType(), NestedOps));
+ Offset += APInt(BitWidth, DL.getIndexedOffset(Ptr->getType(), NestedOps));
Ptr = StripPtrCastKeepAS(Ptr);
}
@@ -831,7 +822,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
}
// Determine which element of the array the offset points into.
- APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+ APInt ElemSize(BitWidth, DL.getTypeAllocSize(ATy->getElementType()));
if (ElemSize == 0)
// The element size is 0. This may be [0 x Ty]*, so just use a zero
// index for this level and proceed to the next level to see if it can
@@ -850,7 +841,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// can't re-form this GEP in a regular form, so bail out. The pointer
// operand likely went through casts that are necessary to make the GEP
// sensible.
- const StructLayout &SL = *TD->getStructLayout(STy);
+ const StructLayout &SL = *DL.getStructLayout(STy);
if (Offset.uge(SL.getSizeInBytes()))
break;
@@ -875,14 +866,14 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
return nullptr;
// Create a GEP.
- Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+ Constant *C = ConstantExpr::getGetElementPtr(SrcTy, Ptr, NewIdxs);
assert(C->getType()->getPointerElementType() == Ty &&
"Computed GetElementPtr has unexpected type!");
// If we ended up indexing a member with a type that doesn't match
// the type of what the original indices indexed, add a cast.
if (Ty != ResultElementTy)
- C = FoldBitCast(C, ResultTy, *TD);
+ C = FoldBitCast(C, ResultTy, DL);
return C;
}
@@ -898,15 +889,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
/// Note that this fails if not all of the operands are constant. Otherwise,
/// this function can only fail when attempting to fold instructions like loads
/// and stores, which have no constant expression form.
-Constant *llvm::ConstantFoldInstruction(Instruction *I,
- const DataLayout *TD,
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Handle PHI nodes quickly here...
if (PHINode *PN = dyn_cast<PHINode>(I)) {
Constant *CommonValue = nullptr;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = PN->getIncomingValue(i);
+ for (Value *Incoming : PN->incoming_values()) {
// If the incoming value is undef then skip it. Note that while we could
// skip the value if it is equal to the phi node itself we choose not to
// because that would break the rule that constant folding only applies if
@@ -919,7 +908,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
return nullptr;
// Fold the PHI's operands.
if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(NewC, TD, TLI);
+ C = ConstantFoldConstantExpression(NewC, DL, TLI);
// If the incoming value is a different constant to
// the one we saw previously, then give up.
if (CommonValue && C != CommonValue)
@@ -942,17 +931,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
// Fold the Instruction's operands.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op))
- Op = ConstantFoldConstantExpression(NewCE, TD, TLI);
+ Op = ConstantFoldConstantExpression(NewCE, DL, TLI);
Ops.push_back(Op);
}
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
- TD, TLI);
+ DL, TLI);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- return ConstantFoldLoadInst(LI, TD);
+ return ConstantFoldLoadInst(LI, DL);
if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
return ConstantExpr::getInsertValue(
@@ -967,11 +956,11 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
EVI->getIndices());
}
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, DL, TLI);
}
static Constant *
-ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL,
const TargetLibraryInfo *TLI,
SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
SmallVector<Constant *, 8> Ops;
@@ -982,25 +971,25 @@ ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
// a ConstantExpr, we don't have to process it again.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
if (FoldedOps.insert(NewCE).second)
- NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
+ NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps);
}
Ops.push_back(NewC);
}
if (CE->isCompare())
return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
- TD, TLI);
- return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
+ DL, TLI);
+ return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, DL, TLI);
}
/// Attempt to fold the constant expression
/// using the specified DataLayout. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
- const DataLayout *TD,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
SmallPtrSet<ConstantExpr *, 4> FoldedOps;
- return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
+ return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps);
}
/// Attempt to constant fold an instruction with the
@@ -1015,12 +1004,12 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
///
Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
ArrayRef<Constant *> Ops,
- const DataLayout *TD,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) {
- if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+ if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], DL))
return C;
}
@@ -1040,10 +1029,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
// If the input is a inttoptr, eliminate the pair. This requires knowing
// the width of a pointer, so it can't be done in ConstantExpr::getCast.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
- if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+ if (CE->getOpcode() == Instruction::IntToPtr) {
Constant *Input = CE->getOperand(0);
unsigned InWidth = Input->getType()->getScalarSizeInBits();
- unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType());
+ unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType());
if (PtrWidth < InWidth) {
Constant *Mask =
ConstantInt::get(CE->getContext(),
@@ -1061,15 +1050,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
// This requires knowing the width of a pointer, so it can't be done in
// ConstantExpr::getCast.
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
- if (TD && CE->getOpcode() == Instruction::PtrToInt) {
+ if (CE->getOpcode() == Instruction::PtrToInt) {
Constant *SrcPtr = CE->getOperand(0);
- unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType());
+ unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
if (MidIntSize >= SrcPtrSize) {
unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
if (SrcAS == DestTy->getPointerAddressSpace())
- return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+ return FoldBitCast(CE->getOperand(0), DestTy, DL);
}
}
}
@@ -1087,9 +1076,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
case Instruction::AddrSpaceCast:
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::BitCast:
- if (TD)
- return FoldBitCast(Ops[0], DestTy, *TD);
- return ConstantExpr::getBitCast(Ops[0], DestTy);
+ return FoldBitCast(Ops[0], DestTy, DL);
case Instruction::Select:
return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
case Instruction::ExtractElement:
@@ -1098,13 +1085,15 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
- case Instruction::GetElementPtr:
- if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI))
+ case Instruction::GetElementPtr: {
+ Type *SrcTy = nullptr;
+ if (Constant *C = CastGEPIndices(SrcTy, Ops, DestTy, DL, TLI))
return C;
- if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
+ if (Constant *C = SymbolicallyEvaluateGEP(SrcTy, Ops, DestTy, DL, TLI))
return C;
- return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
+ return ConstantExpr::getGetElementPtr(SrcTy, Ops[0], Ops.slice(1));
+ }
}
}
@@ -1113,43 +1102,44 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
/// returns a constant expression of the specified operands.
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *Ops0, Constant *Ops1,
- const DataLayout *TD,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// fold: icmp (inttoptr x), null -> icmp x, 0
// fold: icmp (ptrtoint x), 0 -> icmp x, null
// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
// fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
//
- // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+ // FIXME: The following comment is out of data and the DataLayout is here now.
+ // ConstantExpr::getCompare cannot do this, because it doesn't have DL
// around to know if bit truncation is happening.
if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
- if (TD && Ops1->isNullValue()) {
+ if (Ops1->isNullValue()) {
if (CE0->getOpcode() == Instruction::IntToPtr) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+ Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
IntPtrTy, false);
Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
if (CE0->getOpcode() == Instruction::PtrToInt) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
if (CE0->getType() == IntPtrTy) {
Constant *C = CE0->getOperand(0);
Constant *Null = Constant::getNullValue(C->getType());
- return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
}
}
}
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
- if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+ if (CE0->getOpcode() == CE1->getOpcode()) {
if (CE0->getOpcode() == Instruction::IntToPtr) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+ Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
@@ -1157,20 +1147,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
IntPtrTy, false);
Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
IntPtrTy, false);
- return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
if (CE0->getOpcode() == Instruction::PtrToInt) {
- Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+ Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
if (CE0->getType() == IntPtrTy &&
CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
- return ConstantFoldCompareInstOperands(Predicate,
- CE0->getOperand(0),
- CE1->getOperand(0),
- TD,
- TLI);
+ return ConstantFoldCompareInstOperands(
+ Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
}
}
}
@@ -1180,16 +1167,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
// icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
- Constant *LHS =
- ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
- TD, TLI);
- Constant *RHS =
- ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
- TD, TLI);
+ Constant *LHS = ConstantFoldCompareInstOperands(
+ Predicate, CE0->getOperand(0), Ops1, DL, TLI);
+ Constant *RHS = ConstantFoldCompareInstOperands(
+ Predicate, CE0->getOperand(1), Ops1, DL, TLI);
unsigned OpC =
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
Constant *Ops[] = { LHS, RHS };
- return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
+ return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, DL, TLI);
}
}
@@ -1451,26 +1436,16 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
default: break;
case Intrinsic::fabs:
return ConstantFoldFP(fabs, V, Ty);
-#if HAVE_LOG2
case Intrinsic::log2:
- return ConstantFoldFP(log2, V, Ty);
-#endif
-#if HAVE_LOG
+ return ConstantFoldFP(Log2, V, Ty);
case Intrinsic::log:
return ConstantFoldFP(log, V, Ty);
-#endif
-#if HAVE_LOG10
case Intrinsic::log10:
return ConstantFoldFP(log10, V, Ty);
-#endif
-#if HAVE_EXP
case Intrinsic::exp:
return ConstantFoldFP(exp, V, Ty);
-#endif
-#if HAVE_EXP2
case Intrinsic::exp2:
return ConstantFoldFP(exp2, V, Ty);
-#endif
case Intrinsic::floor:
return ConstantFoldFP(floor, V, Ty);
case Intrinsic::ceil:
@@ -1568,8 +1543,8 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
APFloat Val(APFloat::IEEEhalf, Op->getValue());
bool lost = false;
- APFloat::opStatus status =
- Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+ APFloat::opStatus status = Val.convert(
+ Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
// Conversion is always precise.
(void)status;
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
index 1b74f8c..b529c1a 100644
--- a/contrib/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -83,7 +83,8 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool
CostModelAnalysis::runOnFunction(Function &F) {
this->F = &F;
- TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
return false;
}
diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp
index 9334ceb..d603b7b 100644
--- a/contrib/llvm/lib/Analysis/Delinearization.cpp
+++ b/contrib/llvm/lib/Analysis/Delinearization.cpp
@@ -59,14 +59,14 @@ public:
void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolution>();
}
bool Delinearization::runOnFunction(Function &F) {
this->F = &F;
SE = &getAnalysis<ScalarEvolution>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
@@ -141,7 +141,7 @@ char Delinearization::ID = 0;
static const char delinearization_name[] = "Delinearization";
INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index 092df5c..808a38b 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -52,6 +52,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -59,6 +60,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -114,7 +116,7 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
"Dependence Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(DependenceAnalysis, "da",
@@ -132,7 +134,7 @@ bool DependenceAnalysis::runOnFunction(Function &F) {
this->F = &F;
AA = &getAnalysis<AliasAnalysis>();
SE = &getAnalysis<ScalarEvolution>();
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
return false;
}
@@ -145,7 +147,7 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequiredTransitive<AliasAnalysis>();
AU.addRequiredTransitive<ScalarEvolution>();
- AU.addRequiredTransitive<LoopInfo>();
+ AU.addRequiredTransitive<LoopInfoWrapperPass>();
}
@@ -225,13 +227,11 @@ bool Dependence::isScalar(unsigned level) const {
//===----------------------------------------------------------------------===//
// FullDependence methods
-FullDependence::FullDependence(Instruction *Source,
- Instruction *Destination,
+FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
bool PossiblyLoopIndependent,
- unsigned CommonLevels) :
- Dependence(Source, Destination),
- Levels(CommonLevels),
- LoopIndependent(PossiblyLoopIndependent) {
+ unsigned CommonLevels)
+ : Dependence(Source, Destination), Levels(CommonLevels),
+ LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr;
}
@@ -625,14 +625,12 @@ void Dependence::dump(raw_ostream &OS) const {
OS << "!\n";
}
-
-
-static
-AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
- const Value *A,
- const Value *B) {
- const Value *AObj = GetUnderlyingObject(A);
- const Value *BObj = GetUnderlyingObject(B);
+static AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
+ const DataLayout &DL,
+ const Value *A,
+ const Value *B) {
+ const Value *AObj = GetUnderlyingObject(A, DL);
+ const Value *BObj = GetUnderlyingObject(B, DL);
return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
BObj, AA->getTypeStoreSize(BObj->getType()));
}
@@ -832,6 +830,14 @@ bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src,
return isLoopInvariant(Src, LoopNest);
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(UB)) {
+ if (SE->getTypeSizeInBits(Start->getType()) <
+ SE->getTypeSizeInBits(UB->getType())) {
+ if (!AddRec->getNoWrapFlags())
+ return false;
+ }
+ }
if (!isLoopInvariant(Step, LoopNest))
return false;
Loops.set(mapSrcLoop(AddRec->getLoop()));
@@ -850,6 +856,14 @@ bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst,
return isLoopInvariant(Dst, LoopNest);
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ const SCEV *UB = SE->getBackedgeTakenCount(AddRec->getLoop());
+ if (!isa<SCEVCouldNotCompute>(UB)) {
+ if (SE->getTypeSizeInBits(Start->getType()) <
+ SE->getTypeSizeInBits(UB->getType())) {
+ if (!AddRec->getNoWrapFlags())
+ return false;
+ }
+ }
if (!isLoopInvariant(Step, LoopNest))
return false;
Loops.set(mapDstLoop(AddRec->getLoop()));
@@ -944,13 +958,15 @@ bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred,
// All subscripts are all the same type.
// Loop bound may be smaller (e.g., a char).
// Should zero extend loop bound, since it's always >= 0.
-// This routine collects upper bound and extends if needed.
+// This routine collects upper bound and extends or truncates if needed.
+// Truncating is safe when subscripts are known not to wrap. Cases without
+// nowrap flags should have been rejected earlier.
// Return null if no bound available.
const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
Type *T) const {
if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
const SCEV *UB = SE->getBackedgeTakenCount(L);
- return SE->getNoopOrZeroExtend(UB, T);
+ return SE->getTruncateOrZeroExtend(UB, T);
}
return nullptr;
}
@@ -3314,7 +3330,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
Value *SrcPtr = getPointerOperand(Src);
Value *DstPtr = getPointerOperand(Dst);
- switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
+ switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
+ SrcPtr)) {
case AliasAnalysis::MayAlias:
case AliasAnalysis::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
@@ -3347,9 +3364,9 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n");
- UsefulGEP =
- isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
- isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+ UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+ isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
+ (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
}
unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
SmallVector<Subscript, 4> Pair(Pairs);
@@ -3472,8 +3489,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
LI->getLoopFor(Dst->getParent()),
Pair[SI].Loops);
Result.Consistent = false;
- }
- else if (Pair[SI].Classification == Subscript::ZIV) {
+ } else if (Pair[SI].Classification == Subscript::ZIV) {
// always separable
Separable.set(SI);
}
@@ -3525,8 +3541,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
DEBUG(dbgs() << ", SIV\n");
unsigned Level;
const SCEV *SplitIter = nullptr;
- if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
- Result, NewConstraint, SplitIter))
+ if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
+ SplitIter))
return nullptr;
break;
}
@@ -3574,8 +3590,8 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
unsigned Level;
const SCEV *SplitIter = nullptr;
DEBUG(dbgs() << "SIV\n");
- if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
- Result, NewConstraint, SplitIter))
+ if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
+ SplitIter))
return nullptr;
ConstrainedLevels.set(Level);
if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
@@ -3651,8 +3667,10 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
// update Result.DV from constraint vector
DEBUG(dbgs() << " updating\n");
- for (int SJ = ConstrainedLevels.find_first();
- SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
+ for (int SJ = ConstrainedLevels.find_first(); SJ >= 0;
+ SJ = ConstrainedLevels.find_next(SJ)) {
+ if (SJ > (int)CommonLevels)
+ break;
updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
return nullptr;
@@ -3759,8 +3777,8 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
assert(isLoadOrStore(Dst));
Value *SrcPtr = getPointerOperand(Src);
Value *DstPtr = getPointerOperand(Dst);
- assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
- AliasAnalysis::MustAlias);
+ assert(underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), DstPtr,
+ SrcPtr) == AliasAnalysis::MustAlias);
// establish loop nesting levels
establishNestingLevels(Src, Dst);
@@ -3775,9 +3793,9 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
- UsefulGEP =
- isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
- isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
+ UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
+ isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) &&
+ (SrcGEP->getNumOperands() == DstGEP->getNumOperands());
}
unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
SmallVector<Subscript, 4> Pair(Pairs);
diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
new file mode 100644
index 0000000..e5ee295
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -0,0 +1,337 @@
+//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines divergence analysis which determines whether a branch in a
+// GPU program is divergent. It can help branch optimizations such as jump
+// threading and loop unswitching to make better decisions.
+//
+// GPU programs typically use the SIMD execution model, where multiple threads
+// in the same execution group have to execute in lock-step. Therefore, if the
+// code contains divergent branches (i.e., threads in a group do not agree on
+// which path of the branch to take), the group of threads has to execute all
+// the paths from that branch with different subsets of threads enabled until
+// they converge at the immediately post-dominating BB of the paths.
+//
+// Due to this execution model, some optimizations such as jump
+// threading and loop unswitching can be unfortunately harmful when performed on
+// divergent branches. Therefore, an analysis that computes which branches in a
+// GPU program are divergent can help the compiler to selectively run these
+// optimizations.
+//
+// This file defines divergence analysis which computes a conservative but
+// non-trivial approximation of all divergent branches in a GPU program. It
+// partially implements the approach described in
+//
+// Divergence Analysis
+// Sampaio, Souza, Collange, Pereira
+// TOPLAS '13
+//
+// The divergence analysis identifies the sources of divergence (e.g., special
+// variables that hold the thread ID), and recursively marks variables that are
+// data or sync dependent on a source of divergence as divergent.
+//
+// While data dependency is a well-known concept, the notion of sync dependency
+// is worth more explanation. Sync dependence characterizes the control flow
+// aspect of the propagation of branch divergence. For example,
+//
+// %cond = icmp slt i32 %tid, 10
+// br i1 %cond, label %then, label %else
+// then:
+// br label %merge
+// else:
+// br label %merge
+// merge:
+// %a = phi i32 [ 0, %then ], [ 1, %else ]
+//
+// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
+// because %tid is not on its use-def chains, %a is sync dependent on %tid
+// because the branch "br i1 %cond" depends on %tid and affects which value %a
+// is assigned to.
+//
+// The current implementation has the following limitations:
+// 1. intra-procedural. It conservatively considers the arguments of a
+// non-kernel-entry function and the return value of a function call as
+// divergent.
+// 2. memory as black box. It conservatively considers values loaded from
+// generic or local address as divergent. This can be improved by leveraging
+// pointer analysis.
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+#include "llvm/IR/Dominators.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "divergence"
+
+namespace {
+class DivergenceAnalysis : public FunctionPass {
+public:
+ static char ID;
+
+ DivergenceAnalysis() : FunctionPass(ID) {
+ initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTree>();
+ AU.setPreservesAll();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ // Print all divergent branches in the function.
+ void print(raw_ostream &OS, const Module *) const override;
+
+ // Returns true if V is divergent.
+ bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+ // Returns true if V is uniform/non-divergent.
+ bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+private:
+ // Stores all divergent values.
+ DenseSet<const Value *> DivergentValues;
+};
+} // End of anonymous namespace
+
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+ false, true)
+
+namespace {
+
+class DivergencePropagator {
+public:
+ DivergencePropagator(Function &F, TargetTransformInfo &TTI,
+ DominatorTree &DT, PostDominatorTree &PDT,
+ DenseSet<const Value *> &DV)
+ : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
+ void populateWithSourcesOfDivergence();
+ void propagate();
+
+private:
+ // A helper function that explores data dependents of V.
+ void exploreDataDependency(Value *V);
+ // A helper function that explores sync dependents of TI.
+ void exploreSyncDependency(TerminatorInst *TI);
+ // Computes the influence region from Start to End. This region includes all
+ // basic blocks on any path from Start to End.
+ void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion);
+ // Finds all users of I that are outside the influence region, and add these
+ // users to Worklist.
+ void findUsersOutsideInfluenceRegion(
+ Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
+
+ Function &F;
+ TargetTransformInfo &TTI;
+ DominatorTree &DT;
+ PostDominatorTree &PDT;
+ std::vector<Value *> Worklist; // Stack for DFS.
+ DenseSet<const Value *> &DV; // Stores all divergent values.
+};
+
+void DivergencePropagator::populateWithSourcesOfDivergence() {
+ Worklist.clear();
+ DV.clear();
+ for (auto &I : inst_range(F)) {
+ if (TTI.isSourceOfDivergence(&I)) {
+ Worklist.push_back(&I);
+ DV.insert(&I);
+ }
+ }
+ for (auto &Arg : F.args()) {
+ if (TTI.isSourceOfDivergence(&Arg)) {
+ Worklist.push_back(&Arg);
+ DV.insert(&Arg);
+ }
+ }
+}
+
+void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
+ // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
+ // immediate post dominator are divergent. This rule handles if-then-else
+ // patterns. For example,
+ //
+ // if (tid < 5)
+ // a1 = 1;
+ // else
+ // a2 = 2;
+ // a = phi(a1, a2); // sync dependent on (tid < 5)
+ BasicBlock *ThisBB = TI->getParent();
+ BasicBlock *IPostDom = PDT.getNode(ThisBB)->getIDom()->getBlock();
+ if (IPostDom == nullptr)
+ return;
+
+ for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
+ // A PHINode is uniform if it returns the same value no matter which path is
+ // taken.
+ if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second)
+ Worklist.push_back(I);
+ }
+
+ // Propagation rule 2: if a value defined in a loop is used outside, the user
+ // is sync dependent on the condition of the loop exits that dominate the
+ // user. For example,
+ //
+ // int i = 0;
+ // do {
+ // i++;
+ // if (foo(i)) ... // uniform
+ // } while (i < tid);
+ // if (bar(i)) ... // divergent
+ //
+ // A program may contain unstructured loops. Therefore, we cannot leverage
+ // LoopInfo, which only recognizes natural loops.
+ //
+ // The algorithm used here handles both natural and unstructured loops. Given
+ // a branch TI, we first compute its influence region, the union of all simple
+ // paths from TI to its immediate post dominator (IPostDom). Then, we search
+ // for all the values defined in the influence region but used outside. All
+ // these users are sync dependent on TI.
+ DenseSet<BasicBlock *> InfluenceRegion;
+ computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
+ // An insight that can speed up the search process is that all the in-region
+ // values that are used outside must dominate TI. Therefore, instead of
+ // searching every basic blocks in the influence region, we search all the
+ // dominators of TI until it is outside the influence region.
+ BasicBlock *InfluencedBB = ThisBB;
+ while (InfluenceRegion.count(InfluencedBB)) {
+ for (auto &I : *InfluencedBB)
+ findUsersOutsideInfluenceRegion(I, InfluenceRegion);
+ DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
+ if (IDomNode == nullptr)
+ break;
+ InfluencedBB = IDomNode->getBlock();
+ }
+}
+
+void DivergencePropagator::findUsersOutsideInfluenceRegion(
+ Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
+ for (User *U : I.users()) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (!InfluenceRegion.count(UserInst->getParent())) {
+ if (DV.insert(UserInst).second)
+ Worklist.push_back(UserInst);
+ }
+ }
+}
+
+void DivergencePropagator::computeInfluenceRegion(
+ BasicBlock *Start, BasicBlock *End,
+ DenseSet<BasicBlock *> &InfluenceRegion) {
+ assert(PDT.properlyDominates(End, Start) &&
+ "End does not properly dominate Start");
+ std::vector<BasicBlock *> InfluenceStack;
+ InfluenceStack.push_back(Start);
+ InfluenceRegion.insert(Start);
+ while (!InfluenceStack.empty()) {
+ BasicBlock *BB = InfluenceStack.back();
+ InfluenceStack.pop_back();
+ for (BasicBlock *Succ : successors(BB)) {
+ if (End != Succ && InfluenceRegion.insert(Succ).second)
+ InfluenceStack.push_back(Succ);
+ }
+ }
+}
+
+void DivergencePropagator::exploreDataDependency(Value *V) {
+ // Follow def-use chains of V.
+ for (User *U : V->users()) {
+ Instruction *UserInst = cast<Instruction>(U);
+ if (DV.insert(UserInst).second)
+ Worklist.push_back(UserInst);
+ }
+}
+
+void DivergencePropagator::propagate() {
+ // Traverse the dependency graph using DFS.
+ while (!Worklist.empty()) {
+ Value *V = Worklist.back();
+ Worklist.pop_back();
+ if (TerminatorInst *TI = dyn_cast<TerminatorInst>(V)) {
+ // Terminators with less than two successors won't introduce sync
+ // dependency. Ignore them.
+ if (TI->getNumSuccessors() > 1)
+ exploreSyncDependency(TI);
+ }
+ exploreDataDependency(V);
+ }
+}
+
+} /// end namespace anonymous
+
+FunctionPass *llvm::createDivergenceAnalysisPass() {
+ return new DivergenceAnalysis();
+}
+
+bool DivergenceAnalysis::runOnFunction(Function &F) {
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ if (TTIWP == nullptr)
+ return false;
+
+ TargetTransformInfo &TTI = TTIWP->getTTI(F);
+ // Fast path: if the target does not have branch divergence, we do not mark
+ // any branch as divergent.
+ if (!TTI.hasBranchDivergence())
+ return false;
+
+ DivergentValues.clear();
+ DivergencePropagator DP(F, TTI,
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ getAnalysis<PostDominatorTree>(), DivergentValues);
+ DP.populateWithSourcesOfDivergence();
+ DP.propagate();
+ return false;
+}
+
+void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
+ if (DivergentValues.empty())
+ return;
+ const Value *FirstDivergentValue = *DivergentValues.begin();
+ const Function *F;
+ if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
+ F = Arg->getParent();
+ } else if (const Instruction *I =
+ dyn_cast<Instruction>(FirstDivergentValue)) {
+ F = I->getParent()->getParent();
+ } else {
+ llvm_unreachable("Only arguments and instructions can be divergent");
+ }
+
+ // Dumps all divergent values in F, arguments and then instructions.
+ for (auto &Arg : F->args()) {
+ if (DivergentValues.count(&Arg))
+ OS << "DIVERGENT: " << Arg << "\n";
+ }
+ // Iterate instructions using inst_range to ensure a deterministic order.
+ for (auto &I : inst_range(F)) {
+ if (DivergentValues.count(&I))
+ OS << "DIVERGENT:" << I << "\n";
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp
deleted file mode 100644
index a686bec..0000000
--- a/contrib/llvm/lib/Analysis/FunctionTargetTransformInfo.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- llvm/Analysis/FunctionTargetTransformInfo.h --------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass wraps a TargetTransformInfo in a FunctionPass so that it can
-// forward along the current Function so that we can make target specific
-// decisions based on the particular subtarget specified for each Function.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/FunctionTargetTransformInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "function-tti"
-static const char ftti_name[] = "Function TargetTransformInfo";
-INITIALIZE_PASS_BEGIN(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_END(FunctionTargetTransformInfo, "function_tti", ftti_name, false, true)
-char FunctionTargetTransformInfo::ID = 0;
-
-namespace llvm {
-FunctionPass *createFunctionTargetTransformInfoPass() {
- return new FunctionTargetTransformInfo();
-}
-}
-
-FunctionTargetTransformInfo::FunctionTargetTransformInfo()
- : FunctionPass(ID), Fn(nullptr), TTI(nullptr) {
- initializeFunctionTargetTransformInfoPass(*PassRegistry::getPassRegistry());
-}
-
-void FunctionTargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<TargetTransformInfo>();
-}
-
-void FunctionTargetTransformInfo::releaseMemory() {}
-
-bool FunctionTargetTransformInfo::runOnFunction(Function &F) {
- Fn = &F;
- TTI = &getAnalysis<TargetTransformInfo>();
- return false;
-}
diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
index ded1de7..65ba1c7 100644
--- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -49,7 +49,7 @@ public:
explicit CGPassManager()
: ModulePass(ID), PMDataManager() { }
- /// run - Execute all of the passes scheduled for execution. Keep track of
+ /// Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
bool runOnModule(Module &M) override;
@@ -142,9 +142,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
FPPassManager *FPP = (FPPassManager*)P;
// Run pass P on all functions in the current SCC.
- for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
- I != E; ++I) {
- if (Function *F = (*I)->getFunction()) {
+ for (CallGraphNode *CGN : CurSCC) {
+ if (Function *F = CGN->getFunction()) {
dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
{
TimeRegion PassTimer(getPassTimer(FPP));
@@ -165,7 +164,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
}
-/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// Scan the functions in the specified CFG and resync the
/// callgraph with the call sites found in it. This is used after
/// FunctionPasses have potentially munged the callgraph, and can be used after
/// CallGraphSCC passes to verify that they correctly updated the callgraph.
@@ -181,9 +180,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
<< " nodes:\n";
- for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
- I != E; ++I)
- (*I)->dump();
+ for (CallGraphNode *CGN : CurSCC)
+ CGN->dump();
);
bool MadeChange = false;
@@ -214,10 +212,13 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
// list of the same call.
CallSites.count(I->first) ||
- // If the call edge is not from a call or invoke, then the function
- // pass RAUW'd a call with another value. This can happen when
- // constant folding happens of well known functions etc.
- !CallSite(I->first)) {
+ // If the call edge is not from a call or invoke, or it is a
+ // instrinsic call, then the function pass RAUW'd a call with
+ // another value. This can happen when constant folding happens
+ // of well known functions etc.
+ !CallSite(I->first) ||
+ (CallSite(I->first).getCalledFunction() &&
+ CallSite(I->first).getCalledFunction()->isIntrinsic())) {
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
@@ -357,9 +358,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
DEBUG(if (MadeChange) {
dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
- for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
- I != E; ++I)
- (*I)->dump();
+ for (CallGraphNode *CGN : CurSCC)
+ CGN->dump();
if (DevirtualizedCall)
dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
@@ -372,15 +372,15 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
return DevirtualizedCall;
}
-/// RunAllPassesOnSCC - Execute the body of the entire pass manager on the
-/// specified SCC. This keeps track of whether a function pass devirtualizes
+/// Execute the body of the entire pass manager on the specified SCC.
+/// This keeps track of whether a function pass devirtualizes
/// any calls and returns it in DevirtualizedCall.
bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
bool &DevirtualizedCall) {
bool Changed = false;
- // CallGraphUpToDate - Keep track of whether the callgraph is known to be
- // up-to-date or not. The CGSSC pass manager runs two types of passes:
+ // Keep track of whether the callgraph is known to be up-to-date or not.
+ // The CGSSC pass manager runs two types of passes:
// CallGraphSCC Passes and other random function passes. Because other
// random function passes are not CallGraph aware, they may clobber the
// call graph by introducing new calls or deleting other ones. This flag
@@ -433,7 +433,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
return Changed;
}
-/// run - Execute all of the passes scheduled for execution. Keep track of
+/// Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
bool CGPassManager::runOnModule(Module &M) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -519,7 +519,7 @@ bool CGPassManager::doFinalization(CallGraph &CG) {
// CallGraphSCC Implementation
//===----------------------------------------------------------------------===//
-/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// This informs the SCC and the pass manager that the specified
/// Old node has been deleted, and New is to be used in its place.
void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
assert(Old != New && "Should not replace node with self");
@@ -578,8 +578,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS,
CGP->add(this);
}
-/// getAnalysisUsage - For this class, we declare that we require and preserve
-/// the call graph. If the derived class implements this method, it should
+/// For this class, we declare that we require and preserve the call graph.
+/// If the derived class implements this method, it should
/// always explicitly call the implementation here.
void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<CallGraphWrapperPass>();
@@ -609,9 +609,9 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override {
Out << Banner;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
- if ((*I)->getFunction())
- (*I)->getFunction()->print(Out);
+ for (CallGraphNode *CGN : SCC) {
+ if (CGN->getFunction())
+ CGN->getFunction()->print(Out);
else
Out << "\nPrinting <null> Function\n";
}
diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
index 607c068..018ae99 100644
--- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -96,7 +96,7 @@ namespace {
}
bool runOnModule(Module &M) override {
- InitializeAliasAnalysis(this);
+ InitializeAliasAnalysis(this, &M.getDataLayout());
// Find non-addr taken globals.
AnalyzeGlobals(M);
@@ -269,7 +269,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
} else if (Operator::getOpcode(I) == Instruction::BitCast) {
if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
return true;
- } else if (CallSite CS = I) {
+ } else if (auto CS = CallSite(I)) {
// Make sure that this is just the function being called, not that it is
// passing into the function.
if (!CS.isCallee(&U)) {
@@ -322,7 +322,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
continue;
// Check the value being stored.
- Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
+ Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
+ GV->getParent()->getDataLayout());
if (!isAllocLikeFn(Ptr, TLI))
return false; // Too hard to analyze.
@@ -481,8 +482,8 @@ AliasAnalysis::AliasResult
GlobalsModRef::alias(const Location &LocA,
const Location &LocB) {
// Get the base object these pointers point to.
- const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
- const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
+ const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL);
+ const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL);
// If either of the underlying values is a global, they may be non-addr-taken
// globals, which we can answer queries about.
@@ -540,8 +541,9 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
// If we are asking for mod/ref info of a direct call with a pointer to a
// global we are tracking, return information if we have it.
+ const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
if (const GlobalValue *GV =
- dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
+ dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
if (GV->hasLocalLinkage())
if (const Function *F = CS.getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
index 86e7fc2..2bd959d 100644
--- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp
@@ -45,9 +45,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
typedef InstVisitor<CallAnalyzer, bool> Base;
friend class InstVisitor<CallAnalyzer, bool>;
- // DataLayout if available, or null.
- const DataLayout *const DL;
-
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
@@ -67,6 +64,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool ContainsNoDuplicateCall;
bool HasReturn;
bool HasIndirectBr;
+ bool HasFrameEscape;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
@@ -145,18 +143,18 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool visitUnreachableInst(UnreachableInst &I);
public:
- CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
- AssumptionCacheTracker *ACT, Function &Callee, int Threshold)
- : DL(DL), TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
+ CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
+ Function &Callee, int Threshold)
+ : TTI(TTI), ACT(ACT), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
- FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
- NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
- NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
- NumInstructionsSimplified(0), SROACostSavings(0),
- SROACostSavingsLost(0) {}
+ HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+ NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+ TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+ NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+ NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+ SROACostSavings(0), SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -244,10 +242,8 @@ bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
/// Returns false if unable to compute the offset for any reason. Respects any
/// simplified values known during the analysis of this callsite.
bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
- if (!DL)
- return false;
-
- unsigned IntPtrWidth = DL->getPointerSizeInBits();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntPtrWidth = DL.getPointerSizeInBits();
assert(IntPtrWidth == Offset.getBitWidth());
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -263,12 +259,12 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
unsigned ElementIdx = OpC->getZExtValue();
- const StructLayout *SL = DL->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
continue;
}
- APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType()));
+ APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
}
return true;
@@ -289,9 +285,9 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Accumulate the allocated size.
if (I.isStaticAlloca()) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
Type *Ty = I.getAllocatedType();
- AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) :
- Ty->getPrimitiveSizeInBits());
+ AllocatedSize += DL.getTypeAllocSize(Ty);
}
// We will happily inline static alloca instructions.
@@ -327,7 +323,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
// Try to fold GEPs of constant-offset call site argument pointers. This
// requires target data and inbounds GEPs.
- if (DL && I.isInBounds()) {
+ if (I.isInBounds()) {
// Check if we have a base + offset for the pointer.
Value *Ptr = I.getPointerOperand();
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
@@ -396,7 +392,6 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
}
bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
- const DataLayout *DL = I.getDataLayout();
// Propagate constants through ptrtoint.
Constant *COp = dyn_cast<Constant>(I.getOperand(0));
if (!COp)
@@ -410,7 +405,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// Track base/offset pairs when converted to a plain integer provided the
// integer is large enough to represent the pointer.
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
- if (DL && IntegerSize >= DL->getPointerSizeInBits()) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ if (IntegerSize >= DL.getPointerSizeInBits()) {
std::pair<Value *, APInt> BaseAndOffset
= ConstantOffsetPtrs.lookup(I.getOperand(0));
if (BaseAndOffset.first)
@@ -433,7 +429,6 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
}
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
- const DataLayout *DL = I.getDataLayout();
// Propagate constants through ptrtoint.
Constant *COp = dyn_cast<Constant>(I.getOperand(0));
if (!COp)
@@ -448,7 +443,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
// modifications provided the integer is not too large.
Value *Op = I.getOperand(0);
unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
- if (DL && IntegerSize <= DL->getPointerSizeInBits()) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ if (IntegerSize <= DL.getPointerSizeInBits()) {
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
if (BaseAndOffset.first)
ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -485,12 +481,14 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
Constant *COp = dyn_cast<Constant>(Operand);
if (!COp)
COp = SimplifiedValues.lookup(Operand);
- if (COp)
+ if (COp) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
COp, DL)) {
SimplifiedValues[&I] = C;
return true;
}
+ }
// Disable any SROA on the argument to arbitrary unary operators.
disableSROA(Operand);
@@ -595,13 +593,20 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) {
bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ const DataLayout &DL = F.getParent()->getDataLayout();
if (!isa<Constant>(LHS))
if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
LHS = SimpleLHS;
if (!isa<Constant>(RHS))
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;
- Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+ Value *SimpleV = nullptr;
+ if (auto FI = dyn_cast<FPMathOperator>(&I))
+ SimpleV =
+ SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
+ else
+ SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+
if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
SimplifiedValues[&I] = C;
return true;
@@ -617,7 +622,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
bool CallAnalyzer::visitLoad(LoadInst &I) {
Value *SROAArg;
DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
if (I.isSimple()) {
accumulateSROACost(CostIt, InlineConstants::InstrCost);
return true;
@@ -632,7 +637,7 @@ bool CallAnalyzer::visitLoad(LoadInst &I) {
bool CallAnalyzer::visitStore(StoreInst &I) {
Value *SROAArg;
DenseMap<Value *, int>::iterator CostIt;
- if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt)) {
if (I.isSimple()) {
accumulateSROACost(CostIt, InlineConstants::InstrCost);
return true;
@@ -713,8 +718,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
bool CallAnalyzer::visitCallSite(CallSite CS) {
if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
- !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ReturnsTwice)) {
+ !F.hasFnAttribute(Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
ExposesReturnsTwice = true;
return false;
@@ -740,6 +744,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
case Intrinsic::memmove:
// SROA can usually chew through these intrinsics, but they aren't free.
return false;
+ case Intrinsic::frameescape:
+ HasFrameEscape = true;
+ return false;
}
}
@@ -783,7 +790,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(DL, TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
+ CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
@@ -907,6 +914,25 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
+ // If the instruction is floating point, and the target says this operation is
+ // expensive or the function has the "use-soft-float" attribute, this may
+ // eventually become a library call. Treat the cost as such.
+ if (I->getType()->isFloatingPointTy()) {
+ bool hasSoftFloatAttr = false;
+
+ // If the function has the "use-soft-float" attribute, mark it as expensive.
+ if (F.hasFnAttribute("use-soft-float")) {
+ Attribute Attr = F.getFnAttribute("use-soft-float");
+ StringRef Val = Attr.getValueAsString();
+ if (Val == "true")
+ hasSoftFloatAttr = true;
+ }
+
+ if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
+ hasSoftFloatAttr)
+ Cost += InlineConstants::CallPenalty;
+ }
+
// If the instruction simplified to a constant, there is no cost to this
// instruction. Visit the instructions using our InstVisitor to account for
// all of the per-instruction logic. The visit tree returns true if we
@@ -919,7 +945,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
// If the visit this instruction detected an uninlinable pattern, abort.
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr)
+ HasIndirectBr || HasFrameEscape)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -929,16 +955,9 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller)
return false;
- if (NumVectorInstructions > NumInstructions/2)
- VectorBonus = FiftyPercentVectorBonus;
- else if (NumVectorInstructions > NumInstructions/10)
- VectorBonus = TenPercentVectorBonus;
- else
- VectorBonus = 0;
-
- // Check if we've past the threshold so we don't spin in huge basic
- // blocks that will never inline.
- if (Cost > (Threshold + VectorBonus))
+ // Check if we've past the maximum possible threshold so we don't spin in
+ // huge basic blocks that will never inline.
+ if (Cost > Threshold)
return false;
}
@@ -952,10 +971,11 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
/// returns 0 if V is not a pointer, and returns the constant '0' if there are
/// no constant offsets applied.
ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
- if (!DL || !V->getType()->isPointerTy())
+ if (!V->getType()->isPointerTy())
return nullptr;
- unsigned IntPtrWidth = DL->getPointerSizeInBits();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned IntPtrWidth = DL.getPointerSizeInBits();
APInt Offset = APInt::getNullValue(IntPtrWidth);
// Even though we don't look through PHI nodes, we could be called on an
@@ -979,7 +999,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (Visited.insert(V).second);
- Type *IntPtrTy = DL->getIntPtrType(V->getContext());
+ Type *IntPtrTy = DL.getIntPtrType(V->getContext());
return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
}
@@ -993,33 +1013,42 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
bool CallAnalyzer::analyzeCall(CallSite CS) {
++NumCallsAnalyzed;
- // Track whether the post-inlining function would have more than one basic
- // block. A single basic block is often intended for inlining. Balloon the
- // threshold by 50% until we pass the single-BB phase.
- bool SingleBB = true;
- int SingleBBBonus = Threshold / 2;
- Threshold += SingleBBBonus;
-
// Perform some tweaks to the cost and threshold based on the direct
// callsite information.
// We want to more aggressively inline vector-dense kernels, so up the
// threshold, and we'll lower it if the % of vector instructions gets too
- // low.
+ // low. Note that these bonuses are some what arbitrary and evolved over time
+ // by accident as much as because they are principled bonuses.
+ //
+ // FIXME: It would be nice to remove all such bonuses. At least it would be
+ // nice to base the bonus values on something more scientific.
assert(NumInstructions == 0);
assert(NumVectorInstructions == 0);
- FiftyPercentVectorBonus = Threshold;
- TenPercentVectorBonus = Threshold / 2;
+ FiftyPercentVectorBonus = 3 * Threshold / 2;
+ TenPercentVectorBonus = 3 * Threshold / 4;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ // Track whether the post-inlining function would have more than one basic
+ // block. A single basic block is often intended for inlining. Balloon the
+ // threshold by 50% until we pass the single-BB phase.
+ bool SingleBB = true;
+ int SingleBBBonus = Threshold / 2;
+
+ // Speculatively apply all possible bonuses to Threshold. If cost exceeds
+ // this Threshold any time, and cost cannot decrease, we can stop processing
+ // the rest of the function body.
+ Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
// Give out bonuses per argument, as the instructions setting them up will
// be gone after inlining.
for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
- if (DL && CS.isByValArgument(I)) {
+ if (CS.isByValArgument(I)) {
// We approximate the number of loads and stores needed by dividing the
// size of the byval type by the target's pointer size.
PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
- unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType());
- unsigned PointerSize = DL->getPointerSizeInBits();
+ unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType());
+ unsigned PointerSize = DL.getPointerSizeInBits();
// Ceiling division.
unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
@@ -1053,9 +1082,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Instruction *Instr = CS.getInstruction();
if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
if (isa<UnreachableInst>(II->getNormalDest()->begin()))
- Threshold = 1;
+ Threshold = 0;
} else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
- Threshold = 1;
+ Threshold = 0;
// If this function uses the coldcc calling convention, prefer not to inline
// it.
@@ -1127,7 +1156,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
// Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter.
- if (Cost > (Threshold + VectorBonus))
+ if (Cost > Threshold)
break;
BasicBlock *BB = BBWorklist[Idx];
@@ -1147,7 +1176,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// returns false, and we can bail on out.
if (!analyzeBlock(BB, EphValues)) {
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
- HasIndirectBr)
+ HasIndirectBr || HasFrameEscape)
return false;
// If the caller is a recursive function then we don't want to inline
@@ -1205,7 +1234,13 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
return false;
- Threshold += VectorBonus;
+ // We applied the maximum possible vector bonus at the beginning. Now,
+ // subtract the excess bonus, if any, from the Threshold before
+ // comparing against Cost.
+ if (NumVectorInstructions <= NumInstructions / 10)
+ Threshold -= FiftyPercentVectorBonus;
+ else if (NumVectorInstructions <= NumInstructions / 2)
+ Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
return Cost < Threshold;
}
@@ -1220,19 +1255,19 @@ void CallAnalyzer::dump() {
DEBUG_PRINT_STAT(NumConstantPtrCmps);
DEBUG_PRINT_STAT(NumConstantPtrDiffs);
DEBUG_PRINT_STAT(NumInstructionsSimplified);
+ DEBUG_PRINT_STAT(NumInstructions);
DEBUG_PRINT_STAT(SROACostSavings);
DEBUG_PRINT_STAT(SROACostSavingsLost);
DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
DEBUG_PRINT_STAT(Cost);
DEBUG_PRINT_STAT(Threshold);
- DEBUG_PRINT_STAT(VectorBonus);
#undef DEBUG_PRINT_STAT
}
#endif
INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
-INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
@@ -1246,12 +1281,12 @@ InlineCostAnalysis::~InlineCostAnalysis() {}
void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
- TTI = &getAnalysis<TargetTransformInfo>();
+ TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
ACT = &getAnalysis<AssumptionCacheTracker>();
return false;
}
@@ -1262,16 +1297,18 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
/// \brief Test that two functions either have or have not the given attribute
/// at the same time.
-static bool attributeMatches(Function *F1, Function *F2,
- Attribute::AttrKind Attr) {
- return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+template<typename AttrKind>
+static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
+ return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
}
/// \brief Test that there are no attribute conflicts between Caller and Callee
/// that prevent inlining.
static bool functionsHaveCompatibleAttributes(Function *Caller,
Function *Callee) {
- return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+ return attributeMatches(Caller, Callee, "target-cpu") &&
+ attributeMatches(Caller, Callee, "target-features") &&
+ attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
attributeMatches(Caller, Callee, Attribute::SanitizeThread);
}
@@ -1309,8 +1346,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(Callee->getDataLayout(), *TTI,
- ACT, *Callee, Threshold);
+ CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
@@ -1325,9 +1361,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
}
bool InlineCostAnalysis::isInlineViable(Function &F) {
- bool ReturnsTwice =
- F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::ReturnsTwice);
+ bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Disallow inlining of functions which contain indirect branches or
// blockaddresses.
@@ -1349,6 +1383,13 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
if (!ReturnsTwice && CS.isCall() &&
cast<CallInst>(CS.getInstruction())->canReturnTwice())
return false;
+
+ // Disallow inlining functions that call @llvm.frameescape. Doing this
+ // correctly would require major changes to the inliner.
+ if (CS.getCalledFunction() &&
+ CS.getCalledFunction()->getIntrinsicID() ==
+ llvm::Intrinsic::frameescape)
+ return false;
}
}
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
index 6b5f370..b88b249 100644
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,7 +34,7 @@ using namespace llvm;
char IVUsers::ID = 0;
INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
"Induction Variable Users", false, true)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(IVUsers, "iv-users",
@@ -113,6 +114,8 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
/// return true. Otherwise, return false.
bool IVUsers::AddUsersImpl(Instruction *I,
SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
+ const DataLayout &DL = I->getModule()->getDataLayout();
+
// Add this IV user to the Processed set before returning false to ensure that
// all IV users are members of the set. See IVUsers::isIVUserOrOperand.
if (!Processed.insert(I).second)
@@ -124,14 +127,14 @@ bool IVUsers::AddUsersImpl(Instruction *I,
// IVUsers is used by LSR which assumes that all SCEV expressions are safe to
// pass to SCEVExpander. Expressions are not safe to expand if they represent
// operations that are not safe to speculate, namely integer division.
- if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, DL))
+ if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I))
return false;
// LSR is not APInt clean, do not touch integers bigger than 64-bits.
// Also avoid creating IVs of non-native types. For example, we don't want a
// 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
uint64_t Width = SE->getTypeSizeInBits(I->getType());
- if (Width > 64 || (DL && !DL->isLegalInteger(Width)))
+ if (Width > 64 || !DL.isLegalInteger(Width))
return false;
// Get the symbolic expression for this instruction.
@@ -241,7 +244,7 @@ IVUsers::IVUsers()
}
void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
+ AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolution>();
AU.setPreservesAll();
@@ -250,11 +253,9 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
L = l;
- LI = &getAnalysis<LoopInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index 3fbbd7c..097b99e 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -45,13 +45,13 @@ STATISTIC(NumReassoc, "Number of reassociations");
namespace {
struct Query {
- const DataLayout *DL;
+ const DataLayout &DL;
const TargetLibraryInfo *TLI;
const DominatorTree *DT;
AssumptionCache *AC;
const Instruction *CxtI;
- Query(const DataLayout *DL, const TargetLibraryInfo *tli,
+ Query(const DataLayout &DL, const TargetLibraryInfo *tli,
const DominatorTree *dt, AssumptionCache *ac = nullptr,
const Instruction *cxti = nullptr)
: DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {}
@@ -61,6 +61,8 @@ struct Query {
static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
unsigned);
+static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
+ const Query &, unsigned);
static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
unsigned);
static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
@@ -467,8 +469,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
// Evaluate the BinOp on the incoming phi values.
Value *CommonValue = nullptr;
- for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = PI->getIncomingValue(i);
+ for (Value *Incoming : PI->incoming_values()) {
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
Value *V = PI == LHS ?
@@ -508,8 +509,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
// Evaluate the BinOp on the incoming phi values.
Value *CommonValue = nullptr;
- for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = PI->getIncomingValue(i);
+ for (Value *Incoming : PI->incoming_values()) {
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
@@ -582,7 +582,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -599,17 +599,11 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
/// folding.
-static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
- Value *&V,
+static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
bool AllowNonInbounds = false) {
assert(V->getType()->getScalarType()->isPointerTy());
- // Without DataLayout, just be conservative for now. Theoretically, more could
- // be done in this case.
- if (!DL)
- return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0);
-
- Type *IntPtrTy = DL->getIntPtrType(V->getType())->getScalarType();
+ Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType();
APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
// Even though we don't look through PHI nodes, we could be called on an
@@ -619,7 +613,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
if ((!AllowNonInbounds && !GEP->isInBounds()) ||
- !GEP->accumulateConstantOffset(*DL, Offset))
+ !GEP->accumulateConstantOffset(DL, Offset))
break;
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -644,8 +638,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
/// \brief Compute the constant difference between two pointer values.
/// If the difference is not a constant, returns zero.
-static Constant *computePointerDifference(const DataLayout *DL,
- Value *LHS, Value *RHS) {
+static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
+ Value *RHS) {
Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
@@ -781,7 +775,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -960,7 +954,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
}
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -969,7 +963,7 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
}
Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -978,7 +972,7 @@ Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
}
Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -986,7 +980,7 @@ Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
RecursionLimit);
}
-Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1090,7 +1084,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1108,7 +1102,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1116,8 +1110,8 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
RecursionLimit);
}
-static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
- unsigned) {
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const Query &Q, unsigned) {
// undef / X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -1126,14 +1120,21 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
if (match(Op1, m_Undef()))
return Op1;
+ // 0 / X -> 0
+ // Requires that NaNs are off (X could be zero) and signed zeroes are
+ // ignored (X could be positive or negative, so the output sign is unknown).
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+ return Op0;
+
return nullptr;
}
-Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1208,7 +1209,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1226,7 +1227,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1234,8 +1235,8 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
RecursionLimit);
}
-static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
- unsigned) {
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const Query &, unsigned) {
// undef % X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -1244,14 +1245,21 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
if (match(Op1, m_Undef()))
return Op1;
+ // 0 % X -> 0
+ // Requires that NaNs are off (X could be zero) and signed zeroes are
+ // ignored (X could be positive or negative, so the output sign is unknown).
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZero()))
+ return Op0;
+
return nullptr;
}
-Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
@@ -1371,7 +1379,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
}
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
@@ -1395,7 +1403,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
}
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1429,7 +1437,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
}
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1580,9 +1588,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
- if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+ Q.DT))
return Op0;
- if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
+ if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
+ Q.DT))
return Op1;
}
@@ -1627,7 +1637,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1815,7 +1825,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1872,7 +1882,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
return nullptr;
}
-Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -1932,10 +1942,10 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
// If the C and C++ standards are ever made sufficiently restrictive in this
// area, it may be possible to update LLVM's semantics accordingly and reinstate
// this optimization.
-static Constant *computePointerICmp(const DataLayout *DL,
+static Constant *computePointerICmp(const DataLayout &DL,
const TargetLibraryInfo *TLI,
- CmpInst::Predicate Pred,
- Value *LHS, Value *RHS) {
+ CmpInst::Predicate Pred, Value *LHS,
+ Value *RHS) {
// First, skip past any trivial no-ops.
LHS = LHS->stripPointerCasts();
RHS = RHS->stripPointerCasts();
@@ -2353,8 +2363,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
- if (MaxRecurse && Q.DL && isa<PtrToIntInst>(LI) &&
- Q.DL->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
+ if (MaxRecurse && isa<PtrToIntInst>(LI) &&
+ Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
@@ -2966,10 +2976,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// what constant folding can make out of it.
Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
- Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS);
+ Constant *NewLHS = ConstantExpr::getGetElementPtr(
+ GLHS->getSourceElementType(), Null, IndicesLHS);
SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
- Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS);
+ Constant *NewRHS = ConstantExpr::getGetElementPtr(
+ GLHS->getSourceElementType(), Null, IndicesRHS);
return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
}
}
@@ -3008,7 +3020,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
Instruction *CxtI) {
@@ -3038,8 +3050,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Pred == FCmpInst::FCMP_TRUE)
return ConstantInt::get(GetCompareTy(LHS), 1);
- if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef
- return UndefValue::get(GetCompareTy(LHS));
+ // fcmp pred x, undef and fcmp pred undef, x
+ // fold to true if unordered, false if ordered
+ if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
+ // Choosing NaN for the undef will always make unordered comparison succeed
+ // and ordered comparison fail.
+ return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred));
+ }
// fcmp x,x -> true/false. Not all compares are foldable.
if (LHS == RHS) {
@@ -3050,44 +3067,57 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// Handle fcmp with constant RHS
- if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
// If the constant is a nan, see if we can fold the comparison based on it.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
- if (CFP->getValueAPF().isNaN()) {
- if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+ if (CFP->getValueAPF().isNaN()) {
+ if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+ return ConstantInt::getFalse(CFP->getContext());
+ assert(FCmpInst::isUnordered(Pred) &&
+ "Comparison must be either ordered or unordered!");
+ // True if unordered.
+ return ConstantInt::getTrue(CFP->getContext());
+ }
+ // Check whether the constant is an infinity.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ // No value is ordered and less than negative infinity.
return ConstantInt::getFalse(CFP->getContext());
- assert(FCmpInst::isUnordered(Pred) &&
- "Comparison must be either ordered or unordered!");
- // True if unordered.
- return ConstantInt::getTrue(CFP->getContext());
- }
- // Check whether the constant is an infinity.
- if (CFP->getValueAPF().isInfinity()) {
- if (CFP->getValueAPF().isNegative()) {
- switch (Pred) {
- case FCmpInst::FCMP_OLT:
- // No value is ordered and less than negative infinity.
- return ConstantInt::getFalse(CFP->getContext());
- case FCmpInst::FCMP_UGE:
- // All values are unordered with or at least negative infinity.
- return ConstantInt::getTrue(CFP->getContext());
- default:
- break;
- }
- } else {
- switch (Pred) {
- case FCmpInst::FCMP_OGT:
- // No value is ordered and greater than infinity.
- return ConstantInt::getFalse(CFP->getContext());
- case FCmpInst::FCMP_ULE:
- // All values are unordered with and at most infinity.
- return ConstantInt::getTrue(CFP->getContext());
- default:
- break;
- }
+ case FCmpInst::FCMP_UGE:
+ // All values are unordered with or at least negative infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ } else {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGT:
+ // No value is ordered and greater than infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_ULE:
+ // All values are unordered with and at most infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
}
}
}
+ if (CFP->getValueAPF().isZero()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_UGE:
+ if (CannotBeOrderedLessThanZero(LHS))
+ return ConstantInt::getTrue(CFP->getContext());
+ break;
+ case FCmpInst::FCMP_OLT:
+ // X < 0
+ if (CannotBeOrderedLessThanZero(LHS))
+ return ConstantInt::getFalse(CFP->getContext());
+ break;
+ default:
+ break;
+ }
+ }
}
// If the comparison is with the result of a select instruction, check whether
@@ -3106,7 +3136,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -3201,7 +3231,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
}
Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -3211,17 +3241,18 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
+static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+ const Query &Q, unsigned) {
// The type of the GEP pointer operand.
- PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType());
- unsigned AS = PtrTy->getAddressSpace();
+ unsigned AS =
+ cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace();
// getelementptr P -> P.
if (Ops.size() == 1)
return Ops[0];
// Compute the (pointer) type returned by the GEP instruction.
- Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
+ Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Ops.slice(1));
Type *GEPTy = PointerType::get(LastType, AS);
if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
GEPTy = VectorType::get(GEPTy, VT->getNumElements());
@@ -3234,11 +3265,11 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
if (match(Ops[1], m_Zero()))
return Ops[0];
- Type *Ty = PtrTy->getElementType();
- if (Q.DL && Ty->isSized()) {
+ Type *Ty = SrcTy;
+ if (Ty->isSized()) {
Value *P;
uint64_t C;
- uint64_t TyAllocSize = Q.DL->getTypeAllocSize(Ty);
+ uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty);
// getelementptr P, N -> P if P points to a type of zero size.
if (TyAllocSize == 0)
return Ops[0];
@@ -3246,7 +3277,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
// The following transforms are only safe if the ptrtoint cast
// doesn't truncate the pointers.
if (Ops[1]->getType()->getScalarSizeInBits() ==
- Q.DL->getPointerSizeInBits(AS)) {
+ Q.DL.getPointerSizeInBits(AS)) {
auto PtrToIntOrZero = [GEPTy](Value *P) -> Value * {
if (match(P, m_Zero()))
return Constant::getNullValue(GEPTy);
@@ -3288,14 +3319,17 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
if (!isa<Constant>(Ops[i]))
return nullptr;
- return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
+ return ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
+ Ops.slice(1));
}
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL,
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyGEPInst(Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+ return ::SimplifyGEPInst(
+ cast<PointerType>(Ops[0]->getType()->getScalarType())->getElementType(),
+ Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
@@ -3328,7 +3362,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
}
Value *llvm::SimplifyInsertValueInst(
- Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout *DL,
+ Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL,
const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI),
@@ -3341,8 +3375,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// with the common value.
Value *CommonValue = nullptr;
bool HasUndefInput = false;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *Incoming = PN->getIncomingValue(i);
+ for (Value *Incoming : PN->incoming_values()) {
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PN) continue;
if (isa<UndefValue>(Incoming)) {
@@ -3376,7 +3409,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
return nullptr;
}
-Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
@@ -3408,10 +3441,12 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FDiv:
+ return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::FRem:
+ return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::Shl:
return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
Q, MaxRecurse);
@@ -3451,14 +3486,42 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
}
+/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result. If not, this returns null.
+/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
+static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const FastMathFlags &FMF, const Query &Q,
+ unsigned MaxRecurse) {
+ switch (Opcode) {
+ case Instruction::FAdd:
+ return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
+ case Instruction::FSub:
+ return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
+ case Instruction::FMul:
+ return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+ default:
+ return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
+ }
+}
+
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
RecursionLimit);
}
+Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const FastMathFlags &FMF, const DataLayout &DL,
+ const TargetLibraryInfo *TLI,
+ const DominatorTree *DT, AssumptionCache *AC,
+ const Instruction *CxtI) {
+ return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI),
+ RecursionLimit);
+}
+
/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
/// fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -3469,7 +3532,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
@@ -3493,14 +3556,53 @@ static bool IsIdempotent(Intrinsic::ID ID) {
}
template <typename IterTy>
-static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd,
+static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
const Query &Q, unsigned MaxRecurse) {
+ Intrinsic::ID IID = F->getIntrinsicID();
+ unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
+ Type *ReturnType = F->getReturnType();
+
+ // Binary Ops
+ if (NumOperands == 2) {
+ Value *LHS = *ArgBegin;
+ Value *RHS = *(ArgBegin + 1);
+ if (IID == Intrinsic::usub_with_overflow ||
+ IID == Intrinsic::ssub_with_overflow) {
+ // X - X -> { 0, false }
+ if (LHS == RHS)
+ return Constant::getNullValue(ReturnType);
+
+ // X - undef -> undef
+ // undef - X -> undef
+ if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS))
+ return UndefValue::get(ReturnType);
+ }
+
+ if (IID == Intrinsic::uadd_with_overflow ||
+ IID == Intrinsic::sadd_with_overflow) {
+ // X + undef -> undef
+ if (isa<UndefValue>(RHS))
+ return UndefValue::get(ReturnType);
+ }
+
+ if (IID == Intrinsic::umul_with_overflow ||
+ IID == Intrinsic::smul_with_overflow) {
+ // X * 0 -> { 0, false }
+ if (match(RHS, m_Zero()))
+ return Constant::getNullValue(ReturnType);
+
+ // X * undef -> { 0, false }
+ if (match(RHS, m_Undef()))
+ return Constant::getNullValue(ReturnType);
+ }
+ }
+
// Perform idempotent optimizations
if (!IsIdempotent(IID))
return nullptr;
// Unary Ops
- if (std::distance(ArgBegin, ArgEnd) == 1)
+ if (NumOperands == 1)
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
if (II->getIntrinsicID() == IID)
return II;
@@ -3524,9 +3626,8 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
if (!F)
return nullptr;
- if (unsigned IID = F->getIntrinsicID())
- if (Value *Ret =
- SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse))
+ if (F->isIntrinsic())
+ if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse))
return Ret;
if (!canConstantFoldCallTo(F))
@@ -3545,7 +3646,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
}
Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
- User::op_iterator ArgEnd, const DataLayout *DL,
+ User::op_iterator ArgEnd, const DataLayout &DL,
const TargetLibraryInfo *TLI, const DominatorTree *DT,
AssumptionCache *AC, const Instruction *CxtI) {
return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI),
@@ -3553,7 +3654,7 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
}
Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
- const DataLayout *DL, const TargetLibraryInfo *TLI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
return ::SimplifyCall(V, Args.begin(), Args.end(),
@@ -3562,7 +3663,7 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
/// SimplifyInstruction - See if we can compute a simplified version of this
/// instruction. If not, this returns null.
-Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
+Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC) {
Value *Result;
@@ -3608,8 +3709,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
AC, I);
break;
case Instruction::FDiv:
- Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
- AC, I);
+ Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1),
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::SRem:
Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
@@ -3620,8 +3721,8 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
AC, I);
break;
case Instruction::FRem:
- Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
- AC, I);
+ Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1),
+ I->getFastMathFlags(), DL, TLI, DT, AC, I);
break;
case Instruction::Shl:
Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
@@ -3710,12 +3811,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
/// This routine returns 'true' only when *it* simplifies something. The passed
/// in simplified value does not count toward this.
static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
- const DataLayout *DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
AssumptionCache *AC) {
bool Simplified = false;
SmallSetVector<Instruction *, 8> Worklist;
+ const DataLayout &DL = I->getModule()->getDataLayout();
// If we have an explicit value to collapse to, do that round of the
// simplification loop by hand initially.
@@ -3763,19 +3864,18 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
return Simplified;
}
-bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL,
+bool llvm::recursivelySimplifyInstruction(Instruction *I,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
AssumptionCache *AC) {
- return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT, AC);
+ return replaceAndRecursivelySimplifyImpl(I, nullptr, TLI, DT, AC);
}
bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
- const DataLayout *DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT,
AssumptionCache *AC) {
assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
assert(SimpleV && "Must provide a simplified value.");
- return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT, AC);
+ return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC);
}
diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
new file mode 100644
index 0000000..9f1edd2
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -0,0 +1,95 @@
+//===- IteratedDominanceFrontier.cpp - Compute IDF ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \brief Compute iterated dominance frontiers using a linear time algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include <queue>
+
+using namespace llvm;
+
+void IDFCalculator::calculate(SmallVectorImpl<BasicBlock *> &PHIBlocks) {
+ // If we haven't computed dominator tree levels, do so now.
+ if (DomLevels.empty()) {
+ for (auto DFI = df_begin(DT.getRootNode()), DFE = df_end(DT.getRootNode());
+ DFI != DFE; ++DFI) {
+ DomLevels[*DFI] = DFI.getPathLength() - 1;
+ }
+ }
+
+ // Use a priority queue keyed on dominator tree level so that inserted nodes
+ // are handled from the bottom of the dominator tree upwards.
+ typedef std::pair<DomTreeNode *, unsigned> DomTreeNodePair;
+ typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+ less_second> IDFPriorityQueue;
+ IDFPriorityQueue PQ;
+
+ for (BasicBlock *BB : *DefBlocks) {
+ if (DomTreeNode *Node = DT.getNode(BB))
+ PQ.push(std::make_pair(Node, DomLevels.lookup(Node)));
+ }
+
+ SmallVector<DomTreeNode *, 32> Worklist;
+ SmallPtrSet<DomTreeNode *, 32> VisitedPQ;
+ SmallPtrSet<DomTreeNode *, 32> VisitedWorklist;
+
+ while (!PQ.empty()) {
+ DomTreeNodePair RootPair = PQ.top();
+ PQ.pop();
+ DomTreeNode *Root = RootPair.first;
+ unsigned RootLevel = RootPair.second;
+
+ // Walk all dominator tree children of Root, inspecting their CFG edges with
+ // targets elsewhere on the dominator tree. Only targets whose level is at
+ // most Root's level are added to the iterated dominance frontier of the
+ // definition set.
+
+ Worklist.clear();
+ Worklist.push_back(Root);
+ VisitedWorklist.insert(Root);
+
+ while (!Worklist.empty()) {
+ DomTreeNode *Node = Worklist.pop_back_val();
+ BasicBlock *BB = Node->getBlock();
+
+ for (auto Succ : successors(BB)) {
+ DomTreeNode *SuccNode = DT.getNode(Succ);
+
+ // Quickly skip all CFG edges that are also dominator tree edges instead
+ // of catching them below.
+ if (SuccNode->getIDom() == Node)
+ continue;
+
+ unsigned SuccLevel = DomLevels.lookup(SuccNode);
+ if (SuccLevel > RootLevel)
+ continue;
+
+ if (!VisitedPQ.insert(SuccNode).second)
+ continue;
+
+ BasicBlock *SuccBB = SuccNode->getBlock();
+ if (useLiveIn && !LiveInBlocks->count(SuccBB))
+ continue;
+
+ PHIBlocks.emplace_back(SuccBB);
+ if (!DefBlocks->count(SuccBB))
+ PQ.push(std::make_pair(SuccNode, SuccLevel));
+ }
+
+ for (auto DomChild : *Node) {
+ if (VisitedWorklist.insert(DomChild).second)
+ Worklist.push_back(DomChild);
+ }
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp b/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp
deleted file mode 100644
index 7aae2a5..0000000
--- a/contrib/llvm/lib/Analysis/JumpInstrTableInfo.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- JumpInstrTableInfo.cpp: Info for Jump-Instruction Tables ----------===//
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Information about jump-instruction tables that have been created by
-/// JumpInstrTables pass.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "jiti"
-
-#include "llvm/Analysis/JumpInstrTableInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/MathExtras.h"
-
-using namespace llvm;
-
-INITIALIZE_PASS(JumpInstrTableInfo, "jump-instr-table-info",
- "Jump-Instruction Table Info", true, true)
-char JumpInstrTableInfo::ID = 0;
-
-ImmutablePass *llvm::createJumpInstrTableInfoPass() {
- return new JumpInstrTableInfo();
-}
-
-ModulePass *llvm::createJumpInstrTableInfoPass(unsigned Bound) {
- // This cast is always safe, since Bound is always in a subset of uint64_t.
- uint64_t B = static_cast<uint64_t>(Bound);
- return new JumpInstrTableInfo(B);
-}
-
-JumpInstrTableInfo::JumpInstrTableInfo(uint64_t ByteAlign)
- : ImmutablePass(ID), Tables(), ByteAlignment(ByteAlign) {
- if (!llvm::isPowerOf2_64(ByteAlign)) {
- // Note that we don't explicitly handle overflow here, since we handle the 0
- // case explicitly when a caller actually tries to create jumptable entries,
- // and this is the return value on overflow.
- ByteAlignment = llvm::NextPowerOf2(ByteAlign);
- }
-
- initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry());
-}
-
-JumpInstrTableInfo::~JumpInstrTableInfo() {}
-
-void JumpInstrTableInfo::insertEntry(FunctionType *TableFunTy, Function *Target,
- Function *Jump) {
- Tables[TableFunTy].push_back(JumpPair(Target, Jump));
-}
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index 4de56f1..e6f586a 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ConstantRange.h"
@@ -29,7 +30,6 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include <map>
#include <stack>
using namespace llvm;
@@ -41,7 +41,7 @@ char LazyValueInfo::ID = 0;
INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
@@ -191,7 +191,7 @@ public:
/// Merge the specified lattice value into this one, updating this
/// one and returning true if anything changed.
- bool mergeIn(const LVILatticeVal &RHS) {
+ bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
if (RHS.isUndefined() || isOverdefined()) return false;
if (RHS.isOverdefined()) return markOverdefined();
@@ -215,11 +215,9 @@ public:
// Unless we can prove that the two Constants are different, we must
// move to overdefined.
- // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding.
- if (ConstantInt *Res = dyn_cast<ConstantInt>(
- ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
- getConstant(),
- RHS.getNotConstant())))
+ if (ConstantInt *Res =
+ dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
+ CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL)))
if (Res->isOne())
return markNotConstant(RHS.getNotConstant());
@@ -241,11 +239,9 @@ public:
// Unless we can prove that the two Constants are different, we must
// move to overdefined.
- // FIXME: use DataLayout/TargetLibraryInfo for smarter constant folding.
- if (ConstantInt *Res = dyn_cast<ConstantInt>(
- ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
- getNotConstant(),
- RHS.getConstant())))
+ if (ConstantInt *Res =
+ dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
+ CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL)))
if (Res->isOne())
return false;
@@ -346,21 +342,17 @@ namespace {
/// Push BV onto BlockValueStack unless it's already in there.
/// Returns true on success.
bool pushBlockValue(const std::pair<BasicBlock *, Value *> &BV) {
- if (BlockValueSet.count(BV))
+ if (!BlockValueSet.insert(BV).second)
return false; // It's already in the stack.
BlockValueStack.push(BV);
- BlockValueSet.insert(BV);
return true;
}
- /// A pointer to the cache of @llvm.assume calls.
- AssumptionCache *AC;
- /// An optional DL pointer.
- const DataLayout *DL;
- /// An optional DT pointer.
- DominatorTree *DT;
-
+ AssumptionCache *AC; ///< A pointer to the cache of @llvm.assume calls.
+ const DataLayout &DL; ///< A mandatory DataLayout
+ DominatorTree *DT; ///< An optional DT pointer.
+
friend struct LVIValueHandle;
void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
@@ -426,7 +418,7 @@ namespace {
OverDefinedCache.clear();
}
- LazyValueInfoCache(AssumptionCache *AC, const DataLayout *DL = nullptr,
+ LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL,
DominatorTree *DT = nullptr)
: AC(AC), DL(DL), DT(DT) {}
};
@@ -579,11 +571,13 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
if (LoadInst *L = dyn_cast<LoadInst>(I)) {
return L->getPointerAddressSpace() == 0 &&
- GetUnderlyingObject(L->getPointerOperand()) == Ptr;
+ GetUnderlyingObject(L->getPointerOperand(),
+ L->getModule()->getDataLayout()) == Ptr;
}
if (StoreInst *S = dyn_cast<StoreInst>(I)) {
return S->getPointerAddressSpace() == 0 &&
- GetUnderlyingObject(S->getPointerOperand()) == Ptr;
+ GetUnderlyingObject(S->getPointerOperand(),
+ S->getModule()->getDataLayout()) == Ptr;
}
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
if (MI->isVolatile()) return false;
@@ -593,11 +587,13 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
if (!Len || Len->isZero()) return false;
if (MI->getDestAddressSpace() == 0)
- if (GetUnderlyingObject(MI->getRawDest()) == Ptr)
+ if (GetUnderlyingObject(MI->getRawDest(),
+ MI->getModule()->getDataLayout()) == Ptr)
return true;
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
if (MTI->getSourceAddressSpace() == 0)
- if (GetUnderlyingObject(MTI->getRawSource()) == Ptr)
+ if (GetUnderlyingObject(MTI->getRawSource(),
+ MTI->getModule()->getDataLayout()) == Ptr)
return true;
}
return false;
@@ -614,10 +610,11 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
if (isKnownNonNull(Val)) {
NotNull = true;
} else {
- Value *UnderlyingVal = GetUnderlyingObject(Val);
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+ Value *UnderlyingVal = GetUnderlyingObject(Val, DL);
// If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
// inside InstructionDereferencesPointer either.
- if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) {
+ if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, DL, 1)) {
for (Instruction &I : *BB) {
if (InstructionDereferencesPointer(&I, UnderlyingVal)) {
NotNull = true;
@@ -651,7 +648,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
if (EdgesMissing)
continue;
- Result.mergeIn(EdgeResult);
+ Result.mergeIn(EdgeResult, DL);
// If we hit overdefined, exit early. The BlockVals entry is already set
// to overdefined.
@@ -696,7 +693,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
if (EdgesMissing)
continue;
- Result.mergeIn(EdgeResult);
+ Result.mergeIn(EdgeResult, DL);
// If we hit overdefined, exit early. The BlockVals entry is already set
// to overdefined.
@@ -735,7 +732,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
if (!AssumeVH)
continue;
auto *I = cast<CallInst>(AssumeVH);
- if (!isValidAssumeForContext(I, BBI, DL, DT))
+ if (!isValidAssumeForContext(I, BBI, DT))
continue;
Value *C = I->getArgOperand(0);
@@ -745,7 +742,7 @@ void LazyValueInfoCache::mergeAssumeBlockValueConstantRange(Value *Val,
if (BBLV.isOverdefined())
BBLV = Result;
else
- BBLV.mergeIn(Result);
+ BBLV.mergeIn(Result, DL);
}
}
}
@@ -857,10 +854,10 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
- // Calculate the range of values that would satisfy the comparison.
+ // Calculate the range of values that are allowed by the comparison
ConstantRange CmpRange(CI->getValue());
ConstantRange TrueValues =
- ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+ ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange);
if (NegOffset) // Apply the offset from above.
TrueValues = TrueValues.subtract(NegOffset->getValue());
@@ -1104,27 +1101,27 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
/// This lazily constructs the LazyValueInfoCache.
static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
- const DataLayout *DL = nullptr,
+ const DataLayout *DL,
DominatorTree *DT = nullptr) {
- if (!PImpl)
- PImpl = new LazyValueInfoCache(AC, DL, DT);
+ if (!PImpl) {
+ assert(DL && "getCache() called with a null DataLayout");
+ PImpl = new LazyValueInfoCache(AC, *DL, DT);
+ }
return *static_cast<LazyValueInfoCache*>(PImpl);
}
bool LazyValueInfo::runOnFunction(Function &F) {
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ const DataLayout &DL = F.getParent()->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
-
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
if (PImpl)
- getCache(PImpl, AC, DL, DT).clear();
+ getCache(PImpl, AC, &DL, DT).clear();
// Fully lazy.
return false;
@@ -1133,21 +1130,22 @@ bool LazyValueInfo::runOnFunction(Function &F) {
void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
- delete &getCache(PImpl, AC);
+ delete &getCache(PImpl, AC, nullptr);
PImpl = nullptr;
}
}
Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
Instruction *CxtI) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, DL, DT).getValueInBlock(V, BB, CxtI);
+ getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1164,8 +1162,9 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
BasicBlock *ToBB,
Instruction *CxtI) {
+ const DataLayout &DL = FromBB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
if (Result.isConstant())
return Result.getConstant();
@@ -1177,9 +1176,10 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
return nullptr;
}
-static LazyValueInfo::Tristate
-getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result,
- const DataLayout *DL, TargetLibraryInfo *TLI) {
+static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
+ LVILatticeVal &Result,
+ const DataLayout &DL,
+ TargetLibraryInfo *TLI) {
// If we know the value is a constant, evaluate the conditional.
Constant *Res = nullptr;
@@ -1250,8 +1250,9 @@ LazyValueInfo::Tristate
LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
BasicBlock *FromBB, BasicBlock *ToBB,
Instruction *CxtI) {
+ const DataLayout &DL = FromBB->getModule()->getDataLayout();
LVILatticeVal Result =
- getCache(PImpl, AC, DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+ getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
return getPredicateResult(Pred, C, Result, DL, TLI);
}
@@ -1259,18 +1260,23 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
LazyValueInfo::Tristate
LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
Instruction *CxtI) {
- LVILatticeVal Result = getCache(PImpl, AC, DL, DT).getValueAt(V, CxtI);
+ const DataLayout &DL = CxtI->getModule()->getDataLayout();
+ LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
return getPredicateResult(Pred, C, Result, DL, TLI);
}
void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
BasicBlock *NewSucc) {
- if (PImpl)
- getCache(PImpl, AC, DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+ if (PImpl) {
+ const DataLayout &DL = PredBB->getModule()->getDataLayout();
+ getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+ }
}
void LazyValueInfo::eraseBlock(BasicBlock *BB) {
- if (PImpl)
- getCache(PImpl, AC, DL, DT).eraseBlock(BB);
+ if (PImpl) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+ getCache(PImpl, AC, &DL, DT).eraseBlock(BB);
+ }
}
diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
index 016f8c5..f6025e3 100644
--- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -36,7 +36,11 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll(); // Does not transform code
}
-
+bool LibCallAliasAnalysis::runOnFunction(Function &F) {
+ // set up super class
+ InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
+ return false;
+}
/// AnalyzeLibCallDetails - Given a call to a function with the specified
/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
index 23639e7..e98540b 100644
--- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
+++ b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/LibCallSemantics.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/Function.h"
using namespace llvm;
@@ -61,3 +62,29 @@ LibCallInfo::getFunctionInfo(const Function *F) const {
return Map->lookup(F->getName());
}
+/// See if the given exception handling personality function is one that we
+/// understand. If so, return a description of it; otherwise return Unknown.
+EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
+ const Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
+ if (!F)
+ return EHPersonality::Unknown;
+ return StringSwitch<EHPersonality>(F->getName())
+ .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+ .Case("__gxx_personality_v0", EHPersonality::GNU_CXX)
+ .Case("__gcc_personality_v0", EHPersonality::GNU_C)
+ .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+ .Case("_except_handler3", EHPersonality::MSVC_X86SEH)
+ .Case("_except_handler4", EHPersonality::MSVC_X86SEH)
+ .Case("__C_specific_handler", EHPersonality::MSVC_Win64SEH)
+ .Case("__CxxFrameHandler3", EHPersonality::MSVC_CXX)
+ .Default(EHPersonality::Unknown);
+}
+
+bool llvm::canSimplifyInvokeNoUnwind(const InvokeInst *II) {
+ const LandingPadInst *LP = II->getLandingPadInst();
+ EHPersonality Personality = classifyEHPersonality(LP->getPersonalityFn());
+ // We can't simplify any invokes to nounwind functions if the personality
+ // function wants to catch asynch exceptions. The nounwind attribute only
+ // implies that the function does not throw synchronous exceptions.
+ return !isAsynchronousEHPersonality(Personality);
+}
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index b5c7245..65a90d7 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -36,12 +36,14 @@
#include "llvm/Analysis/Lint.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -49,19 +51,18 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
namespace {
namespace MemRef {
- static unsigned Read = 1;
- static unsigned Write = 2;
- static unsigned Callee = 4;
- static unsigned Branchee = 8;
+ static const unsigned Read = 1;
+ static const unsigned Write = 2;
+ static const unsigned Callee = 4;
+ static const unsigned Branchee = 8;
}
class Lint : public FunctionPass, public InstVisitor<Lint> {
@@ -73,6 +74,8 @@ namespace {
void visitMemoryReference(Instruction &I, Value *Ptr,
uint64_t Size, unsigned Align,
Type *Ty, unsigned Flags);
+ void visitEHBeginCatch(IntrinsicInst *II);
+ void visitEHEndCatch(IntrinsicInst *II);
void visitCallInst(CallInst &I);
void visitInvokeInst(InvokeInst &I);
@@ -95,8 +98,8 @@ namespace {
void visitInsertElementInst(InsertElementInst &I);
void visitUnreachableInst(UnreachableInst &I);
- Value *findValue(Value *V, bool OffsetOk) const;
- Value *findValueImpl(Value *V, bool OffsetOk,
+ Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
SmallPtrSetImpl<Value *> &Visited) const;
public:
@@ -104,7 +107,6 @@ namespace {
AliasAnalysis *AA;
AssumptionCache *AC;
DominatorTree *DT;
- const DataLayout *DL;
TargetLibraryInfo *TLI;
std::string Messages;
@@ -121,32 +123,38 @@ namespace {
AU.setPreservesAll();
AU.addRequired<AliasAnalysis>();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
}
void print(raw_ostream &O, const Module *M) const override {}
- void WriteValue(const Value *V) {
- if (!V) return;
- if (isa<Instruction>(V)) {
- MessagesStr << *V << '\n';
- } else {
- V->printAsOperand(MessagesStr, true, Mod);
- MessagesStr << '\n';
+ void WriteValues(ArrayRef<const Value *> Vs) {
+ for (const Value *V : Vs) {
+ if (!V)
+ continue;
+ if (isa<Instruction>(V)) {
+ MessagesStr << *V << '\n';
+ } else {
+ V->printAsOperand(MessagesStr, true, Mod);
+ MessagesStr << '\n';
+ }
}
}
- // CheckFailed - A check failed, so print out the condition and the message
- // that failed. This provides a nice place to put a breakpoint if you want
- // to see why something is not correct.
- void CheckFailed(const Twine &Message,
- const Value *V1 = nullptr, const Value *V2 = nullptr,
- const Value *V3 = nullptr, const Value *V4 = nullptr) {
- MessagesStr << Message.str() << "\n";
- WriteValue(V1);
- WriteValue(V2);
- WriteValue(V3);
- WriteValue(V4);
+ /// \brief A check failed, so printout out the condition and the message.
+ ///
+ /// This provides a nice place to put a breakpoint if you want to see why
+ /// something is not correct.
+ void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
+
+ /// \brief A check failed (with values to print).
+ ///
+ /// This calls the Message-only version so that the above is easier to set
+ /// a breakpoint on.
+ template <typename T1, typename... Ts>
+ void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) {
+ CheckFailed(Message);
+ WriteValues({V1, Vs...});
}
};
}
@@ -155,23 +163,15 @@ char Lint::ID = 0;
INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
false, true)
// Assert - We know that cond should be true, if not print an error message.
-#define Assert(C, M) \
- do { if (!(C)) { CheckFailed(M); return; } } while (0)
-#define Assert1(C, M, V1) \
- do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
-#define Assert2(C, M, V1, V2) \
- do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
-#define Assert3(C, M, V1, V2, V3) \
- do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
-#define Assert4(C, M, V1, V2, V3, V4) \
- do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+#define Assert(C, ...) \
+ do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
// Lint::run - This is the main Analysis entry point for a
// function.
@@ -181,9 +181,7 @@ bool Lint::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
visit(F);
dbgs() << MessagesStr.str();
Messages.clear();
@@ -193,8 +191,8 @@ bool Lint::runOnFunction(Function &F) {
void Lint::visitFunction(Function &F) {
// This isn't undefined behavior, it's just a little unusual, and it's a
// fairly common mistake to neglect to name a function.
- Assert1(F.hasName() || F.hasLocalLinkage(),
- "Unusual: Unnamed function with non-local linkage", &F);
+ Assert(F.hasName() || F.hasLocalLinkage(),
+ "Unusual: Unnamed function with non-local linkage", &F);
// TODO: Check for irreducible control flow.
}
@@ -202,27 +200,30 @@ void Lint::visitFunction(Function &F) {
void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
+ const DataLayout &DL = CS->getModule()->getDataLayout();
visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
0, nullptr, MemRef::Callee);
- if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
- Assert1(CS.getCallingConv() == F->getCallingConv(),
- "Undefined behavior: Caller and callee calling convention differ",
- &I);
+ if (Function *F = dyn_cast<Function>(findValue(Callee, DL,
+ /*OffsetOk=*/false))) {
+ Assert(CS.getCallingConv() == F->getCallingConv(),
+ "Undefined behavior: Caller and callee calling convention differ",
+ &I);
FunctionType *FT = F->getFunctionType();
unsigned NumActualArgs = CS.arg_size();
- Assert1(FT->isVarArg() ?
- FT->getNumParams() <= NumActualArgs :
- FT->getNumParams() == NumActualArgs,
- "Undefined behavior: Call argument count mismatches callee "
- "argument count", &I);
+ Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
+ : FT->getNumParams() == NumActualArgs,
+ "Undefined behavior: Call argument count mismatches callee "
+ "argument count",
+ &I);
- Assert1(FT->getReturnType() == I.getType(),
- "Undefined behavior: Call return type mismatches "
- "callee return type", &I);
+ Assert(FT->getReturnType() == I.getType(),
+ "Undefined behavior: Call return type mismatches "
+ "callee return type",
+ &I);
// Check argument types (in case the callee was casted) and attributes.
// TODO: Verify that caller and callee attributes are compatible.
@@ -232,9 +233,10 @@ void Lint::visitCallSite(CallSite CS) {
Value *Actual = *AI;
if (PI != PE) {
Argument *Formal = PI++;
- Assert1(Formal->getType() == Actual->getType(),
- "Undefined behavior: Call argument type mismatches "
- "callee parameter type", &I);
+ Assert(Formal->getType() == Actual->getType(),
+ "Undefined behavior: Call argument type mismatches "
+ "callee parameter type",
+ &I);
// Check that noalias arguments don't alias other arguments. This is
// not fully precise because we don't know the sizes of the dereferenced
@@ -243,9 +245,9 @@ void Lint::visitCallSite(CallSite CS) {
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
if (AI != BI && (*BI)->getType()->isPointerTy()) {
AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
- Assert1(Result != AliasAnalysis::MustAlias &&
- Result != AliasAnalysis::PartialAlias,
- "Unusual: noalias argument aliases another argument", &I);
+ Assert(Result != AliasAnalysis::MustAlias &&
+ Result != AliasAnalysis::PartialAlias,
+ "Unusual: noalias argument aliases another argument", &I);
}
// Check that an sret argument points to valid memory.
@@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) {
Type *Ty =
cast<PointerType>(Formal->getType())->getElementType();
visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
- DL ? DL->getABITypeAlignment(Ty) : 0,
- Ty, MemRef::Read | MemRef::Write);
+ DL.getABITypeAlignment(Ty), Ty,
+ MemRef::Read | MemRef::Write);
}
}
}
@@ -263,10 +265,11 @@ void Lint::visitCallSite(CallSite CS) {
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
AI != AE; ++AI) {
- Value *Obj = findValue(*AI, /*OffsetOk=*/true);
- Assert1(!isa<AllocaInst>(Obj),
- "Undefined behavior: Call with \"tail\" keyword references "
- "alloca", &I);
+ Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true);
+ Assert(!isa<AllocaInst>(Obj),
+ "Undefined behavior: Call with \"tail\" keyword references "
+ "alloca",
+ &I);
}
@@ -291,13 +294,13 @@ void Lint::visitCallSite(CallSite CS) {
// overlap is not distinguished from the case where nothing is known.
uint64_t Size = 0;
if (const ConstantInt *Len =
- dyn_cast<ConstantInt>(findValue(MCI->getLength(),
- /*OffsetOk=*/false)))
+ dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL,
+ /*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
Size = Len->getValue().getZExtValue();
- Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
- AliasAnalysis::MustAlias,
- "Undefined behavior: memcpy source and destination overlap", &I);
+ Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+ AliasAnalysis::MustAlias,
+ "Undefined behavior: memcpy source and destination overlap", &I);
break;
}
case Intrinsic::memmove: {
@@ -321,9 +324,9 @@ void Lint::visitCallSite(CallSite CS) {
}
case Intrinsic::vastart:
- Assert1(I.getParent()->getParent()->isVarArg(),
- "Undefined behavior: va_start called in a non-varargs function",
- &I);
+ Assert(I.getParent()->getParent()->isVarArg(),
+ "Undefined behavior: va_start called in a non-varargs function",
+ &I);
visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
0, nullptr, MemRef::Read | MemRef::Write);
@@ -346,6 +349,13 @@ void Lint::visitCallSite(CallSite CS) {
visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
0, nullptr, MemRef::Read | MemRef::Write);
break;
+
+ case Intrinsic::eh_begincatch:
+ visitEHBeginCatch(II);
+ break;
+ case Intrinsic::eh_endcatch:
+ visitEHEndCatch(II);
+ break;
}
}
@@ -359,14 +369,13 @@ void Lint::visitInvokeInst(InvokeInst &I) {
void Lint::visitReturnInst(ReturnInst &I) {
Function *F = I.getParent()->getParent();
- Assert1(!F->doesNotReturn(),
- "Unusual: Return statement in function with noreturn attribute",
- &I);
+ Assert(!F->doesNotReturn(),
+ "Unusual: Return statement in function with noreturn attribute", &I);
if (Value *V = I.getReturnValue()) {
- Value *Obj = findValue(V, /*OffsetOk=*/true);
- Assert1(!isa<AllocaInst>(Obj),
- "Unusual: Returning alloca value", &I);
+ Value *Obj =
+ findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true);
+ Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
}
}
@@ -380,45 +389,47 @@ void Lint::visitMemoryReference(Instruction &I,
if (Size == 0)
return;
- Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
- Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
- "Undefined behavior: Null pointer dereference", &I);
- Assert1(!isa<UndefValue>(UnderlyingObject),
- "Undefined behavior: Undef pointer dereference", &I);
- Assert1(!isa<ConstantInt>(UnderlyingObject) ||
- !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
- "Unusual: All-ones pointer dereference", &I);
- Assert1(!isa<ConstantInt>(UnderlyingObject) ||
- !cast<ConstantInt>(UnderlyingObject)->isOne(),
- "Unusual: Address one pointer dereference", &I);
+ Value *UnderlyingObject =
+ findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true);
+ Assert(!isa<ConstantPointerNull>(UnderlyingObject),
+ "Undefined behavior: Null pointer dereference", &I);
+ Assert(!isa<UndefValue>(UnderlyingObject),
+ "Undefined behavior: Undef pointer dereference", &I);
+ Assert(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+ "Unusual: All-ones pointer dereference", &I);
+ Assert(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isOne(),
+ "Unusual: Address one pointer dereference", &I);
if (Flags & MemRef::Write) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
- Assert1(!GV->isConstant(),
- "Undefined behavior: Write to read-only memory", &I);
- Assert1(!isa<Function>(UnderlyingObject) &&
- !isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Write to text section", &I);
+ Assert(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
+ &I);
+ Assert(!isa<Function>(UnderlyingObject) &&
+ !isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Write to text section", &I);
}
if (Flags & MemRef::Read) {
- Assert1(!isa<Function>(UnderlyingObject),
- "Unusual: Load from function body", &I);
- Assert1(!isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Load from block address", &I);
+ Assert(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
+ &I);
+ Assert(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Load from block address", &I);
}
if (Flags & MemRef::Callee) {
- Assert1(!isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Call to block address", &I);
+ Assert(!isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Call to block address", &I);
}
if (Flags & MemRef::Branchee) {
- Assert1(!isa<Constant>(UnderlyingObject) ||
- isa<BlockAddress>(UnderlyingObject),
- "Undefined behavior: Branch to non-blockaddress", &I);
+ Assert(!isa<Constant>(UnderlyingObject) ||
+ isa<BlockAddress>(UnderlyingObject),
+ "Undefined behavior: Branch to non-blockaddress", &I);
}
// Check for buffer overflows and misalignment.
// Only handles memory references that read/write something simple like an
// alloca instruction or a global variable.
+ auto &DL = I.getModule()->getDataLayout();
int64_t Offset = 0;
if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) {
// OK, so the access is to a constant offset from Ptr. Check that Ptr is
@@ -429,37 +440,37 @@ void Lint::visitMemoryReference(Instruction &I,
if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
Type *ATy = AI->getAllocatedType();
- if (DL && !AI->isArrayAllocation() && ATy->isSized())
- BaseSize = DL->getTypeAllocSize(ATy);
+ if (!AI->isArrayAllocation() && ATy->isSized())
+ BaseSize = DL.getTypeAllocSize(ATy);
BaseAlign = AI->getAlignment();
- if (DL && BaseAlign == 0 && ATy->isSized())
- BaseAlign = DL->getABITypeAlignment(ATy);
+ if (BaseAlign == 0 && ATy->isSized())
+ BaseAlign = DL.getABITypeAlignment(ATy);
} else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
// If the global may be defined differently in another compilation unit
// then don't warn about funky memory accesses.
if (GV->hasDefinitiveInitializer()) {
Type *GTy = GV->getType()->getElementType();
- if (DL && GTy->isSized())
- BaseSize = DL->getTypeAllocSize(GTy);
+ if (GTy->isSized())
+ BaseSize = DL.getTypeAllocSize(GTy);
BaseAlign = GV->getAlignment();
- if (DL && BaseAlign == 0 && GTy->isSized())
- BaseAlign = DL->getABITypeAlignment(GTy);
+ if (BaseAlign == 0 && GTy->isSized())
+ BaseAlign = DL.getABITypeAlignment(GTy);
}
}
// Accesses from before the start or after the end of the object are not
// defined.
- Assert1(Size == AliasAnalysis::UnknownSize ||
- BaseSize == AliasAnalysis::UnknownSize ||
- (Offset >= 0 && Offset + Size <= BaseSize),
- "Undefined behavior: Buffer overflow", &I);
+ Assert(Size == AliasAnalysis::UnknownSize ||
+ BaseSize == AliasAnalysis::UnknownSize ||
+ (Offset >= 0 && Offset + Size <= BaseSize),
+ "Undefined behavior: Buffer overflow", &I);
// Accesses that say that the memory is more aligned than it is are not
// defined.
- if (DL && Align == 0 && Ty && Ty->isSized())
- Align = DL->getABITypeAlignment(Ty);
- Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
- "Undefined behavior: Memory reference address is misaligned", &I);
+ if (Align == 0 && Ty && Ty->isSized())
+ Align = DL.getABITypeAlignment(Ty);
+ Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
+ "Undefined behavior: Memory reference address is misaligned", &I);
}
}
@@ -477,39 +488,219 @@ void Lint::visitStoreInst(StoreInst &I) {
}
void Lint::visitXor(BinaryOperator &I) {
- Assert1(!isa<UndefValue>(I.getOperand(0)) ||
- !isa<UndefValue>(I.getOperand(1)),
- "Undefined result: xor(undef, undef)", &I);
+ Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: xor(undef, undef)", &I);
}
void Lint::visitSub(BinaryOperator &I) {
- Assert1(!isa<UndefValue>(I.getOperand(0)) ||
- !isa<UndefValue>(I.getOperand(1)),
- "Undefined result: sub(undef, undef)", &I);
+ Assert(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
+ "Undefined result: sub(undef, undef)", &I);
}
void Lint::visitLShr(BinaryOperator &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(
+ findValue(I.getOperand(1), I.getModule()->getDataLayout(),
+ /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
}
void Lint::visitAShr(BinaryOperator &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
+ I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
}
void Lint::visitShl(BinaryOperator &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
- "Undefined result: Shift count out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
+ I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+ "Undefined result: Shift count out of range", &I);
+}
+
+static bool
+allPredsCameFromLandingPad(BasicBlock *BB,
+ SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+ VisitedBlocks.insert(BB);
+ if (BB->isLandingPad())
+ return true;
+ // If we find a block with no predecessors, the search failed.
+ if (pred_empty(BB))
+ return false;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (VisitedBlocks.count(Pred))
+ continue;
+ if (!allPredsCameFromLandingPad(Pred, VisitedBlocks))
+ return false;
+ }
+ return true;
+}
+
+static bool
+allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin,
+ IntrinsicInst **SecondBeginCatch,
+ SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+ VisitedBlocks.insert(BB);
+ for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) {
+ IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I);
+ if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch)
+ return true;
+ // If we find another begincatch while looking for an endcatch,
+ // that's also an error.
+ if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) {
+ *SecondBeginCatch = IC;
+ return false;
+ }
+ }
+
+ // If we reach a block with no successors while searching, the
+ // search has failed.
+ if (succ_empty(BB))
+ return false;
+ // Otherwise, search all of the successors.
+ for (BasicBlock *Succ : successors(BB)) {
+ if (VisitedBlocks.count(Succ))
+ continue;
+ if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch,
+ VisitedBlocks))
+ return false;
+ }
+ return true;
+}
+
+void Lint::visitEHBeginCatch(IntrinsicInst *II) {
+ // The checks in this function make a potentially dubious assumption about
+ // the CFG, namely that any block involved in a catch is only used for the
+ // catch. This will very likely be true of IR generated by a front end,
+ // but it may cease to be true, for example, if the IR is run through a
+ // pass which combines similar blocks.
+ //
+ // In general, if we encounter a block the isn't dominated by the catch
+ // block while we are searching the catch block's successors for a call
+ // to end catch intrinsic, then it is possible that it will be legal for
+ // a path through this block to never reach a call to llvm.eh.endcatch.
+ // An analogous statement could be made about our search for a landing
+ // pad among the catch block's predecessors.
+ //
+ // What is actually required is that no path is possible at runtime that
+ // reaches a call to llvm.eh.begincatch without having previously visited
+ // a landingpad instruction and that no path is possible at runtime that
+ // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch
+ // (mentally adjusting for the fact that in reality these calls will be
+ // removed before code generation).
+ //
+ // Because this is a lint check, we take a pessimistic approach and warn if
+ // the control flow is potentially incorrect.
+
+ SmallSet<BasicBlock *, 4> VisitedBlocks;
+ BasicBlock *CatchBB = II->getParent();
+
+ // The begin catch must occur in a landing pad block or all paths
+ // to it must have come from a landing pad.
+ Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
+ "llvm.eh.begincatch may be reachable without passing a landingpad",
+ II);
+
+ // Reset the visited block list.
+ VisitedBlocks.clear();
+
+ IntrinsicInst *SecondBeginCatch = nullptr;
+
+ // This has to be called before it is asserted. Otherwise, the first assert
+ // below can never be hit.
+ bool EndCatchFound = allSuccessorsReachEndCatch(
+ CatchBB, std::next(static_cast<BasicBlock::iterator>(II)),
+ &SecondBeginCatch, VisitedBlocks);
+ Assert(
+ SecondBeginCatch == nullptr,
+ "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch",
+ II, SecondBeginCatch);
+ Assert(EndCatchFound,
+ "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
+ II);
+}
+
+static bool allPredCameFromBeginCatch(
+ BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin,
+ IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) {
+ VisitedBlocks.insert(BB);
+ // Look for a begincatch in this block.
+ for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE;
+ ++RI) {
+ IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI);
+ if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch)
+ return true;
+ // If we find another end catch before we find a begin catch, that's
+ // an error.
+ if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) {
+ *SecondEndCatch = IC;
+ return false;
+ }
+ // If we encounter a landingpad instruction, the search failed.
+ if (isa<LandingPadInst>(*RI))
+ return false;
+ }
+ // If while searching we find a block with no predeccesors,
+ // the search failed.
+ if (pred_empty(BB))
+ return false;
+ // Search any predecessors we haven't seen before.
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (VisitedBlocks.count(Pred))
+ continue;
+ if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch,
+ VisitedBlocks))
+ return false;
+ }
+ return true;
+}
+
+void Lint::visitEHEndCatch(IntrinsicInst *II) {
+ // The check in this function makes a potentially dubious assumption about
+ // the CFG, namely that any block involved in a catch is only used for the
+ // catch. This will very likely be true of IR generated by a front end,
+ // but it may cease to be true, for example, if the IR is run through a
+ // pass which combines similar blocks.
+ //
+ // In general, if we encounter a block the isn't post-dominated by the
+ // end catch block while we are searching the end catch block's predecessors
+ // for a call to the begin catch intrinsic, then it is possible that it will
+ // be legal for a path to reach the end catch block without ever having
+ // called llvm.eh.begincatch.
+ //
+ // What is actually required is that no path is possible at runtime that
+ // reaches a call to llvm.eh.endcatch without having previously visited
+ // a call to llvm.eh.begincatch (mentally adjusting for the fact that in
+ // reality these calls will be removed before code generation).
+ //
+ // Because this is a lint check, we take a pessimistic approach and warn if
+ // the control flow is potentially incorrect.
+
+ BasicBlock *EndCatchBB = II->getParent();
+
+ // Alls paths to the end catch call must pass through a begin catch call.
+
+ // If llvm.eh.begincatch wasn't called in the current block, we'll use this
+ // lambda to recursively look for it in predecessors.
+ SmallSet<BasicBlock *, 4> VisitedBlocks;
+ IntrinsicInst *SecondEndCatch = nullptr;
+
+ // This has to be called before it is asserted. Otherwise, the first assert
+ // below can never be hit.
+ bool BeginCatchFound =
+ allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II),
+ &SecondEndCatch, VisitedBlocks);
+ Assert(
+ SecondEndCatch == nullptr,
+ "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch",
+ II, SecondEndCatch);
+ Assert(BeginCatchFound,
+ "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
+ II);
}
-static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
+static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
AssumptionCache *AC) {
// Assume undef could be zero.
if (isa<UndefValue>(V))
@@ -550,30 +741,30 @@ static bool isZero(Value *V, const DataLayout *DL, DominatorTree *DT,
}
void Lint::visitSDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitUDiv(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitSRem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitURem(BinaryOperator &I) {
- Assert1(!isZero(I.getOperand(1), DL, DT, AC),
- "Undefined behavior: Division by zero", &I);
+ Assert(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
+ "Undefined behavior: Division by zero", &I);
}
void Lint::visitAllocaInst(AllocaInst &I) {
if (isa<ConstantInt>(I.getArraySize()))
// This isn't undefined behavior, it's just an obvious pessimization.
- Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
- "Pessimization: Static alloca outside of entry block", &I);
+ Assert(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+ "Pessimization: Static alloca outside of entry block", &I);
// TODO: Check for an unusual size (MSB set?)
}
@@ -587,32 +778,33 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {
visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0,
nullptr, MemRef::Branchee);
- Assert1(I.getNumDestinations() != 0,
- "Undefined behavior: indirectbr with no destinations", &I);
+ Assert(I.getNumDestinations() != 0,
+ "Undefined behavior: indirectbr with no destinations", &I);
}
void Lint::visitExtractElementInst(ExtractElementInst &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
- /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
- "Undefined result: extractelement index out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(
+ findValue(I.getIndexOperand(), I.getModule()->getDataLayout(),
+ /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+ "Undefined result: extractelement index out of range", &I);
}
void Lint::visitInsertElementInst(InsertElementInst &I) {
- if (ConstantInt *CI =
- dyn_cast<ConstantInt>(findValue(I.getOperand(2),
- /*OffsetOk=*/false)))
- Assert1(CI->getValue().ult(I.getType()->getNumElements()),
- "Undefined result: insertelement index out of range", &I);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(
+ findValue(I.getOperand(2), I.getModule()->getDataLayout(),
+ /*OffsetOk=*/false)))
+ Assert(CI->getValue().ult(I.getType()->getNumElements()),
+ "Undefined result: insertelement index out of range", &I);
}
void Lint::visitUnreachableInst(UnreachableInst &I) {
// This isn't undefined behavior, it's merely suspicious.
- Assert1(&I == I.getParent()->begin() ||
- std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
- "Unusual: unreachable immediately preceded by instruction without "
- "side effects", &I);
+ Assert(&I == I.getParent()->begin() ||
+ std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+ "Unusual: unreachable immediately preceded by instruction without "
+ "side effects",
+ &I);
}
/// findValue - Look through bitcasts and simple memory reference patterns
@@ -622,13 +814,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) {
/// Most analysis passes don't require this logic, because instcombine
/// will simplify most of these kinds of things away. But it's a goal of
/// this Lint pass to be useful even on non-optimized IR.
-Value *Lint::findValue(Value *V, bool OffsetOk) const {
+Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const {
SmallPtrSet<Value *, 4> Visited;
- return findValueImpl(V, OffsetOk, Visited);
+ return findValueImpl(V, DL, OffsetOk, Visited);
}
/// findValueImpl - Implementation helper for findValue.
-Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
SmallPtrSetImpl<Value *> &Visited) const {
// Detect self-referential values.
if (!Visited.insert(V).second)
@@ -649,7 +841,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
break;
if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
BB, BBI, 6, AA))
- return findValueImpl(U, OffsetOk, Visited);
+ return findValueImpl(U, DL, OffsetOk, Visited);
if (BBI != BB->begin()) break;
BB = BB->getUniquePredecessor();
if (!BB) break;
@@ -658,40 +850,38 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
if (Value *W = PN->hasConstantValue())
if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
} else if (CastInst *CI = dyn_cast<CastInst>(V)) {
if (CI->isNoopCast(DL))
- return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+ return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited);
} else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
Ex->getIndices()))
if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
// Same as above, but for ConstantExpr instead of Instruction.
if (Instruction::isCast(CE->getOpcode())) {
if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
- CE->getOperand(0)->getType(),
- CE->getType(),
- DL ? DL->getIntPtrType(V->getType()) :
- Type::getInt64Ty(V->getContext())))
- return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+ CE->getOperand(0)->getType(), CE->getType(),
+ DL.getIntPtrType(V->getType())))
+ return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited);
} else if (CE->getOpcode() == Instruction::ExtractValue) {
ArrayRef<unsigned> Indices = CE->getIndices();
if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
}
}
// As a last resort, try SimplifyInstruction or constant folding.
if (Instruction *Inst = dyn_cast<Instruction>(V)) {
if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
if (W != V)
- return findValueImpl(W, OffsetOk, Visited);
+ return findValueImpl(W, DL, OffsetOk, Visited);
}
return V;
@@ -711,7 +901,7 @@ void llvm::lintFunction(const Function &f) {
Function &F = const_cast<Function&>(f);
assert(!F.isDeclaration() && "Cannot lint external functions");
- FunctionPassManager FPM(F.getParent());
+ legacy::FunctionPassManager FPM(F.getParent());
Lint *V = new Lint();
FPM.add(V);
FPM.run(F);
@@ -720,7 +910,7 @@ void llvm::lintFunction(const Function &f) {
/// lintModule - Check a module for errors, printing messages on stderr.
///
void llvm::lintModule(const Module &M) {
- PassManager PM;
+ legacy::PassManager PM;
Lint *V = new Lint();
PM.add(V);
PM.run(const_cast<Module&>(M));
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index 5042eb9..aed3b04 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
using namespace llvm;
@@ -62,7 +63,8 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
/// This uses the pointee type to determine how many bytes need to be safe to
/// load from the pointer.
bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
- unsigned Align, const DataLayout *DL) {
+ unsigned Align) {
+ const DataLayout &DL = ScanFrom->getModule()->getDataLayout();
int64_t ByteOffset = 0;
Value *Base = V;
Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);
@@ -87,19 +89,19 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
}
PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = DL ? DL->getTypeStoreSize(AddrTy->getElementType()) : 0;
+ uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType());
// If we found a base allocated type from either an alloca or global variable,
// try to see if we are definitively within the allocated region. We need to
// know the size of the base type and the loaded type to do anything in this
- // case, so only try this when we have the DataLayout available.
- if (BaseType && BaseType->isSized() && DL) {
+ // case.
+ if (BaseType && BaseType->isSized()) {
if (BaseAlign == 0)
- BaseAlign = DL->getPrefTypeAlignment(BaseType);
+ BaseAlign = DL.getPrefTypeAlignment(BaseType);
if (Align <= BaseAlign) {
// Check if the load is within the bounds of the underlying object.
- if (ByteOffset + LoadSize <= DL->getTypeAllocSize(BaseType) &&
+ if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
(Align == 0 || (ByteOffset % Align) == 0))
return true;
}
@@ -133,16 +135,13 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
else
continue;
- // Handle trivial cases even w/o DataLayout or other work.
+ // Handle trivial cases.
if (AccessedPtr == V)
return true;
- if (!DL)
- continue;
-
auto *AccessedTy = cast<PointerType>(AccessedPtr->getType());
if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
- LoadSize <= DL->getTypeStoreSize(AccessedTy->getElementType()))
+ LoadSize <= DL.getTypeStoreSize(AccessedTy->getElementType()))
return true;
}
return false;
@@ -176,13 +175,10 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
- // Try to get the DataLayout for this module. This may be null, in which case
- // the optimizations will be limited.
- const DataLayout *DL = ScanBB->getDataLayout();
+ const DataLayout &DL = ScanBB->getModule()->getDataLayout();
// Try to get the store size for the type.
- uint64_t AccessSize = DL ? DL->getTypeStoreSize(AccessTy)
- : AA ? AA->getTypeStoreSize(AccessTy) : 0;
+ uint64_t AccessSize = DL.getTypeStoreSize(AccessTy);
Value *StrippedPtr = Ptr->stripPointerCasts();
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
new file mode 100644
index 0000000..b70de00
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -0,0 +1,1427 @@
+//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation for the loop memory dependence that was originally
+// developed for the loop vectorizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-accesses"
+
+static cl::opt<unsigned, true>
+VectorizationFactor("force-vector-width", cl::Hidden,
+ cl::desc("Sets the SIMD width. Zero is autoselect."),
+ cl::location(VectorizerParams::VectorizationFactor));
+unsigned VectorizerParams::VectorizationFactor;
+
+static cl::opt<unsigned, true>
+VectorizationInterleave("force-vector-interleave", cl::Hidden,
+ cl::desc("Sets the vectorization interleave count. "
+ "Zero is autoselect."),
+ cl::location(
+ VectorizerParams::VectorizationInterleave));
+unsigned VectorizerParams::VectorizationInterleave;
+
+static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
+ "runtime-memory-check-threshold", cl::Hidden,
+ cl::desc("When performing memory disambiguation checks at runtime do not "
+ "generate more than this number of comparisons (default = 8)."),
+ cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
+unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
+
+/// Maximum SIMD width.
+const unsigned VectorizerParams::MaxVectorWidth = 64;
+
+/// \brief We collect interesting dependences up to this threshold.
+static cl::opt<unsigned> MaxInterestingDependence(
+ "max-interesting-dependences", cl::Hidden,
+ cl::desc("Maximum number of interesting dependences collected by "
+ "loop-access analysis (default = 100)"),
+ cl::init(100));
+
+bool VectorizerParams::isInterleaveForced() {
+ return ::VectorizationInterleave.getNumOccurrences() > 0;
+}
+
+void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
+ const Function *TheFunction,
+ const Loop *TheLoop,
+ const char *PassName) {
+ DebugLoc DL = TheLoop->getStartLoc();
+ if (const Instruction *I = Message.getInstr())
+ DL = I->getDebugLoc();
+ emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
+ *TheFunction, DL, Message.str());
+}
+
+Value *llvm::stripIntegerCast(Value *V) {
+ if (CastInst *CI = dyn_cast<CastInst>(V))
+ if (CI->getOperand(0)->getType()->isIntegerTy())
+ return CI->getOperand(0);
+ return V;
+}
+
+const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+ const ValueToValueMap &PtrToStride,
+ Value *Ptr, Value *OrigPtr) {
+
+ const SCEV *OrigSCEV = SE->getSCEV(Ptr);
+
+ // If there is an entry in the map return the SCEV of the pointer with the
+ // symbolic stride replaced by one.
+ ValueToValueMap::const_iterator SI =
+ PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
+ if (SI != PtrToStride.end()) {
+ Value *StrideVal = SI->second;
+
+ // Strip casts.
+ StrideVal = stripIntegerCast(StrideVal);
+
+ // Replace symbolic stride by one.
+ Value *One = ConstantInt::get(StrideVal->getType(), 1);
+ ValueToValueMap RewriteMap;
+ RewriteMap[StrideVal] = One;
+
+ const SCEV *ByOne =
+ SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
+ DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+ << "\n");
+ return ByOne;
+ }
+
+ // Otherwise, just return the SCEV of the original pointer.
+ return SE->getSCEV(Ptr);
+}
+
+void LoopAccessInfo::RuntimePointerCheck::insert(
+ ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
+ unsigned ASId, const ValueToValueMap &Strides) {
+ // Get the stride replaced scev.
+ const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+ assert(AR && "Invalid addrec expression");
+ const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+ const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+ Pointers.push_back(Ptr);
+ Starts.push_back(AR->getStart());
+ Ends.push_back(ScEnd);
+ IsWritePtr.push_back(WritePtr);
+ DependencySetId.push_back(DepSetId);
+ AliasSetId.push_back(ASId);
+}
+
+bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
+ unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+ // No need to check if two readonly pointers intersect.
+ if (!IsWritePtr[I] && !IsWritePtr[J])
+ return false;
+
+ // Only need to check pointers between two different dependency sets.
+ if (DependencySetId[I] == DependencySetId[J])
+ return false;
+
+ // Only need to check pointers in the same alias set.
+ if (AliasSetId[I] != AliasSetId[J])
+ return false;
+
+ // If PtrPartition is set omit checks between pointers of the same partition.
+ // Partition number -1 means that the pointer is used in multiple partitions.
+ // In this case we can't omit the check.
+ if (PtrPartition && (*PtrPartition)[I] != -1 &&
+ (*PtrPartition)[I] == (*PtrPartition)[J])
+ return false;
+
+ return true;
+}
+
+void LoopAccessInfo::RuntimePointerCheck::print(
+ raw_ostream &OS, unsigned Depth,
+ const SmallVectorImpl<int> *PtrPartition) const {
+ unsigned NumPointers = Pointers.size();
+ if (NumPointers == 0)
+ return;
+
+ OS.indent(Depth) << "Run-time memory checks:\n";
+ unsigned N = 0;
+ for (unsigned I = 0; I < NumPointers; ++I)
+ for (unsigned J = I + 1; J < NumPointers; ++J)
+ if (needsChecking(I, J, PtrPartition)) {
+ OS.indent(Depth) << N++ << ":\n";
+ OS.indent(Depth + 2) << *Pointers[I];
+ if (PtrPartition)
+ OS << " (Partition: " << (*PtrPartition)[I] << ")";
+ OS << "\n";
+ OS.indent(Depth + 2) << *Pointers[J];
+ if (PtrPartition)
+ OS << " (Partition: " << (*PtrPartition)[J] << ")";
+ OS << "\n";
+ }
+}
+
+bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+ const SmallVectorImpl<int> *PtrPartition) const {
+ unsigned NumPointers = Pointers.size();
+
+ for (unsigned I = 0; I < NumPointers; ++I)
+ for (unsigned J = I + 1; J < NumPointers; ++J)
+ if (needsChecking(I, J, PtrPartition))
+ return true;
+ return false;
+}
+
+namespace {
+/// \brief Analyses memory accesses in a loop.
+///
+/// Checks whether run time pointer checks are needed and builds sets for data
+/// dependence checking.
+class AccessAnalysis {
+public:
+ /// \brief Read or write access location.
+ typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
+ typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+
+ AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
+ MemoryDepChecker::DepCandidates &DA)
+ : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
+
+ /// \brief Register a load and whether it is only read from.
+ void addLoad(AliasAnalysis::Location &Loc, bool IsReadOnly) {
+ Value *Ptr = const_cast<Value*>(Loc.Ptr);
+ AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
+ Accesses.insert(MemAccessInfo(Ptr, false));
+ if (IsReadOnly)
+ ReadOnlyPtr.insert(Ptr);
+ }
+
+ /// \brief Register a store.
+ void addStore(AliasAnalysis::Location &Loc) {
+ Value *Ptr = const_cast<Value*>(Loc.Ptr);
+ AST.add(Ptr, AliasAnalysis::UnknownSize, Loc.AATags);
+ Accesses.insert(MemAccessInfo(Ptr, true));
+ }
+
+ /// \brief Check whether we can check the pointers at runtime for
+ /// non-intersection.
+ bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
+ unsigned &NumComparisons, ScalarEvolution *SE,
+ Loop *TheLoop, const ValueToValueMap &Strides,
+ bool ShouldCheckStride = false);
+
+ /// \brief Goes over all memory accesses, checks whether a RT check is needed
+ /// and builds sets of dependent accesses.
+ void buildDependenceSets() {
+ processMemAccesses();
+ }
+
+ bool isRTCheckNeeded() { return IsRTCheckNeeded; }
+
+ bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
+
+ /// We decided that no dependence analysis would be used. Reset the state.
+ void resetDepChecks(MemoryDepChecker &DepChecker) {
+ CheckDeps.clear();
+ DepChecker.clearInterestingDependences();
+ }
+
+ MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
+
+private:
+ typedef SetVector<MemAccessInfo> PtrAccessSet;
+
+ /// \brief Go over all memory access and check whether runtime pointer checks
+ /// are needed /// and build sets of dependency check candidates.
+ void processMemAccesses();
+
+ /// Set of all accesses.
+ PtrAccessSet Accesses;
+
+ const DataLayout &DL;
+
+ /// Set of accesses that need a further dependence check.
+ MemAccessInfoSet CheckDeps;
+
+ /// Set of pointers that are read only.
+ SmallPtrSet<Value*, 16> ReadOnlyPtr;
+
+ /// An alias set tracker to partition the access set by underlying object and
+ //intrinsic property (such as TBAA metadata).
+ AliasSetTracker AST;
+
+ LoopInfo *LI;
+
+ /// Sets of potentially dependent accesses - members of one set share an
+ /// underlying pointer. The set "CheckDeps" identfies which sets really need a
+ /// dependence check.
+ MemoryDepChecker::DepCandidates &DepCands;
+
+ bool IsRTCheckNeeded;
+};
+
+} // end anonymous namespace
+
+/// \brief Check whether a pointer can participate in a runtime bounds check.
+static bool hasComputableBounds(ScalarEvolution *SE,
+ const ValueToValueMap &Strides, Value *Ptr) {
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+ if (!AR)
+ return false;
+
+ return AR->isAffine();
+}
+
+/// \brief Check the stride of the pointer and ensure that it does not wrap in
+/// the address space.
+static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap);
+
+bool AccessAnalysis::canCheckPtrAtRT(
+ LoopAccessInfo::RuntimePointerCheck &RtCheck, unsigned &NumComparisons,
+ ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
+ bool ShouldCheckStride) {
+ // Find pointers with computable bounds. We are going to use this information
+ // to place a runtime bound check.
+ bool CanDoRT = true;
+
+ bool IsDepCheckNeeded = isDependencyCheckNeeded();
+ NumComparisons = 0;
+
+ // We assign a consecutive id to access from different alias sets.
+ // Accesses between different groups doesn't need to be checked.
+ unsigned ASId = 1;
+ for (auto &AS : AST) {
+ unsigned NumReadPtrChecks = 0;
+ unsigned NumWritePtrChecks = 0;
+
+ // We assign consecutive id to access from different dependence sets.
+ // Accesses within the same set don't need a runtime check.
+ unsigned RunningDepId = 1;
+ DenseMap<Value *, unsigned> DepSetId;
+
+ for (auto A : AS) {
+ Value *Ptr = A.getValue();
+ bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
+ MemAccessInfo Access(Ptr, IsWrite);
+
+ if (IsWrite)
+ ++NumWritePtrChecks;
+ else
+ ++NumReadPtrChecks;
+
+ if (hasComputableBounds(SE, StridesMap, Ptr) &&
+ // When we run after a failing dependency check we have to make sure
+ // we don't have wrapping pointers.
+ (!ShouldCheckStride ||
+ isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
+ // The id of the dependence set.
+ unsigned DepId;
+
+ if (IsDepCheckNeeded) {
+ Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+ unsigned &LeaderId = DepSetId[Leader];
+ if (!LeaderId)
+ LeaderId = RunningDepId++;
+ DepId = LeaderId;
+ } else
+ // Each access has its own dependence set.
+ DepId = RunningDepId++;
+
+ RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+
+ DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+ } else {
+ DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
+ CanDoRT = false;
+ }
+ }
+
+ if (IsDepCheckNeeded && CanDoRT && RunningDepId == 2)
+ NumComparisons += 0; // Only one dependence set.
+ else {
+ NumComparisons += (NumWritePtrChecks * (NumReadPtrChecks +
+ NumWritePtrChecks - 1));
+ }
+
+ ++ASId;
+ }
+
+ // If the pointers that we would use for the bounds comparison have different
+ // address spaces, assume the values aren't directly comparable, so we can't
+ // use them for the runtime check. We also have to assume they could
+ // overlap. In the future there should be metadata for whether address spaces
+ // are disjoint.
+ unsigned NumPointers = RtCheck.Pointers.size();
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i + 1; j < NumPointers; ++j) {
+ // Only need to check pointers between two different dependency sets.
+ if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+ continue;
+ // Only need to check pointers in the same alias set.
+ if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+ continue;
+
+ Value *PtrI = RtCheck.Pointers[i];
+ Value *PtrJ = RtCheck.Pointers[j];
+
+ unsigned ASi = PtrI->getType()->getPointerAddressSpace();
+ unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
+ if (ASi != ASj) {
+ DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
+ " different address spaces\n");
+ return false;
+ }
+ }
+ }
+
+ return CanDoRT;
+}
+
+void AccessAnalysis::processMemAccesses() {
+ // We process the set twice: first we process read-write pointers, last we
+ // process read-only pointers. This allows us to skip dependence tests for
+ // read-only pointers.
+
+ DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
+ DEBUG(dbgs() << " AST: "; AST.dump());
+ DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
+ DEBUG({
+ for (auto A : Accesses)
+ dbgs() << "\t" << *A.getPointer() << " (" <<
+ (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
+ "read-only" : "read")) << ")\n";
+ });
+
+ // The AliasSetTracker has nicely partitioned our pointers by metadata
+ // compatibility and potential for underlying-object overlap. As a result, we
+ // only need to check for potential pointer dependencies within each alias
+ // set.
+ for (auto &AS : AST) {
+ // Note that both the alias-set tracker and the alias sets themselves used
+ // linked lists internally and so the iteration order here is deterministic
+ // (matching the original instruction order within each set).
+
+ bool SetHasWrite = false;
+
+ // Map of pointers to last access encountered.
+ typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
+ UnderlyingObjToAccessMap ObjToLastAccess;
+
+ // Set of access to check after all writes have been processed.
+ PtrAccessSet DeferredAccesses;
+
+ // Iterate over each alias set twice, once to process read/write pointers,
+ // and then to process read-only pointers.
+ for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
+ bool UseDeferred = SetIteration > 0;
+ PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
+
+ for (auto AV : AS) {
+ Value *Ptr = AV.getValue();
+
+ // For a single memory access in AliasSetTracker, Accesses may contain
+ // both read and write, and they both need to be handled for CheckDeps.
+ for (auto AC : S) {
+ if (AC.getPointer() != Ptr)
+ continue;
+
+ bool IsWrite = AC.getInt();
+
+ // If we're using the deferred access set, then it contains only
+ // reads.
+ bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
+ if (UseDeferred && !IsReadOnlyPtr)
+ continue;
+ // Otherwise, the pointer must be in the PtrAccessSet, either as a
+ // read or a write.
+ assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
+ S.count(MemAccessInfo(Ptr, false))) &&
+ "Alias-set pointer not in the access set?");
+
+ MemAccessInfo Access(Ptr, IsWrite);
+ DepCands.insert(Access);
+
+ // Memorize read-only pointers for later processing and skip them in
+ // the first round (they need to be checked after we have seen all
+ // write pointers). Note: we also mark pointer that are not
+ // consecutive as "read-only" pointers (so that we check
+ // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
+ if (!UseDeferred && IsReadOnlyPtr) {
+ DeferredAccesses.insert(Access);
+ continue;
+ }
+
+ // If this is a write - check other reads and writes for conflicts. If
+ // this is a read only check other writes for conflicts (but only if
+ // there is no other write to the ptr - this is an optimization to
+ // catch "a[i] = a[i] + " without having to do a dependence check).
+ if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
+ CheckDeps.insert(Access);
+ IsRTCheckNeeded = true;
+ }
+
+ if (IsWrite)
+ SetHasWrite = true;
+
+ // Create sets of pointers connected by a shared alias set and
+ // underlying object.
+ typedef SmallVector<Value *, 16> ValueVector;
+ ValueVector TempObjects;
+
+ GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
+ DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
+ for (Value *UnderlyingObj : TempObjects) {
+ UnderlyingObjToAccessMap::iterator Prev =
+ ObjToLastAccess.find(UnderlyingObj);
+ if (Prev != ObjToLastAccess.end())
+ DepCands.unionSets(Access, Prev->second);
+
+ ObjToLastAccess[UnderlyingObj] = Access;
+ DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
+ }
+ }
+ }
+ }
+ }
+}
+
+static bool isInBoundsGep(Value *Ptr) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ return GEP->isInBounds();
+ return false;
+}
+
+/// \brief Check whether the access through \p Ptr has a constant stride.
+static int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap) {
+ const Type *Ty = Ptr->getType();
+ assert(Ty->isPointerTy() && "Unexpected non-ptr");
+
+ // Make sure that the pointer does not point to aggregate types.
+ const PointerType *PtrTy = cast<PointerType>(Ty);
+ if (PtrTy->getElementType()->isAggregateType()) {
+ DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
+ << *Ptr << "\n");
+ return 0;
+ }
+
+ const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
+
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
+ if (!AR) {
+ DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ // The accesss function must stride over the innermost loop.
+ if (Lp != AR->getLoop()) {
+ DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
+ *Ptr << " SCEV: " << *PtrScev << "\n");
+ }
+
+ // The address calculation must not wrap. Otherwise, a dependence could be
+ // inverted.
+ // An inbounds getelementptr that is a AddRec with a unit stride
+ // cannot wrap per definition. The unit stride requirement is checked later.
+ // An getelementptr without an inbounds attribute and unit stride would have
+ // to access the pointer value "0" which is undefined behavior in address
+ // space 0, therefore we can also vectorize this case.
+ bool IsInBoundsGEP = isInBoundsGep(Ptr);
+ bool IsNoWrapAddRec = AR->getNoWrapFlags(SCEV::NoWrapMask);
+ bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
+ if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
+ DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
+ << *Ptr << " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ // Check the step is constant.
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C) {
+ DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
+ " SCEV: " << *PtrScev << "\n");
+ return 0;
+ }
+
+ auto &DL = Lp->getHeader()->getModule()->getDataLayout();
+ int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+ const APInt &APStepVal = C->getValue()->getValue();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return 0;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+
+ // Strided access.
+ int64_t Stride = StepVal / Size;
+ int64_t Rem = StepVal % Size;
+ if (Rem)
+ return 0;
+
+ // If the SCEV could wrap but we have an inbounds gep with a unit stride we
+ // know we can't "wrap around the address space". In case of address space
+ // zero we know that this won't happen without triggering undefined behavior.
+ if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
+ Stride != 1 && Stride != -1)
+ return 0;
+
+ return Stride;
+}
+
+bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
+ switch (Type) {
+ case NoDep:
+ case Forward:
+ case BackwardVectorizable:
+ return true;
+
+ case Unknown:
+ case ForwardButPreventsForwarding:
+ case Backward:
+ case BackwardVectorizableButPreventsForwarding:
+ return false;
+ }
+ llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
+ switch (Type) {
+ case NoDep:
+ case Forward:
+ return false;
+
+ case BackwardVectorizable:
+ case Unknown:
+ case ForwardButPreventsForwarding:
+ case Backward:
+ case BackwardVectorizableButPreventsForwarding:
+ return true;
+ }
+ llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
+ switch (Type) {
+ case NoDep:
+ case Forward:
+ case ForwardButPreventsForwarding:
+ return false;
+
+ case Unknown:
+ case BackwardVectorizable:
+ case Backward:
+ case BackwardVectorizableButPreventsForwarding:
+ return true;
+ }
+ llvm_unreachable("unexpected DepType!");
+}
+
+bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
+ unsigned TypeByteSize) {
+ // If loads occur at a distance that is not a multiple of a feasible vector
+ // factor store-load forwarding does not take place.
+ // Positive dependences might cause troubles because vectorizing them might
+ // prevent store-load forwarding making vectorized code run a lot slower.
+ // a[i] = a[i-3] ^ a[i-8];
+ // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
+ // hence on your typical architecture store-load forwarding does not take
+ // place. Vectorizing in such cases does not make sense.
+ // Store-load forwarding distance.
+ const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
+ // Maximum vector factor.
+ unsigned MaxVFWithoutSLForwardIssues =
+ VectorizerParams::MaxVectorWidth * TypeByteSize;
+ if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
+ MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
+
+ for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
+ vf *= 2) {
+ if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
+ MaxVFWithoutSLForwardIssues = (vf >>=1);
+ break;
+ }
+ }
+
+ if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
+ DEBUG(dbgs() << "LAA: Distance " << Distance <<
+ " that could cause a store-load forwarding conflict\n");
+ return true;
+ }
+
+ if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
+ MaxVFWithoutSLForwardIssues !=
+ VectorizerParams::MaxVectorWidth * TypeByteSize)
+ MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
+ return false;
+}
+
+MemoryDepChecker::Dependence::DepType
+MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
+ const MemAccessInfo &B, unsigned BIdx,
+ const ValueToValueMap &Strides) {
+ assert (AIdx < BIdx && "Must pass arguments in program order");
+
+ Value *APtr = A.getPointer();
+ Value *BPtr = B.getPointer();
+ bool AIsWrite = A.getInt();
+ bool BIsWrite = B.getInt();
+
+ // Two reads are independent.
+ if (!AIsWrite && !BIsWrite)
+ return Dependence::NoDep;
+
+ // We cannot check pointers in different address spaces.
+ if (APtr->getType()->getPointerAddressSpace() !=
+ BPtr->getType()->getPointerAddressSpace())
+ return Dependence::Unknown;
+
+ const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
+ const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
+
+ int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
+ int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
+
+ const SCEV *Src = AScev;
+ const SCEV *Sink = BScev;
+
+ // If the induction step is negative we have to invert source and sink of the
+ // dependence.
+ if (StrideAPtr < 0) {
+ //Src = BScev;
+ //Sink = AScev;
+ std::swap(APtr, BPtr);
+ std::swap(Src, Sink);
+ std::swap(AIsWrite, BIsWrite);
+ std::swap(AIdx, BIdx);
+ std::swap(StrideAPtr, StrideBPtr);
+ }
+
+ const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
+
+ DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
+ << "(Induction step: " << StrideAPtr << ")\n");
+ DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
+ << *InstMap[BIdx] << ": " << *Dist << "\n");
+
+ // Need consecutive accesses. We don't want to vectorize
+ // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
+ // the address space.
+ if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
+ DEBUG(dbgs() << "Non-consecutive pointer access\n");
+ return Dependence::Unknown;
+ }
+
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
+ if (!C) {
+ DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
+ ShouldRetryWithRuntimeCheck = true;
+ return Dependence::Unknown;
+ }
+
+ Type *ATy = APtr->getType()->getPointerElementType();
+ Type *BTy = BPtr->getType()->getPointerElementType();
+ auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+ unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
+
+ // Negative distances are not plausible dependencies.
+ const APInt &Val = C->getValue()->getValue();
+ if (Val.isNegative()) {
+ bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
+ if (IsTrueDataDependence &&
+ (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
+ ATy != BTy))
+ return Dependence::ForwardButPreventsForwarding;
+
+ DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
+ return Dependence::Forward;
+ }
+
+ // Write to the same location with the same size.
+ // Could be improved to assert type sizes are the same (i32 == float, etc).
+ if (Val == 0) {
+ if (ATy == BTy)
+ return Dependence::NoDep;
+ DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
+ return Dependence::Unknown;
+ }
+
+ assert(Val.isStrictlyPositive() && "Expect a positive value");
+
+ if (ATy != BTy) {
+ DEBUG(dbgs() <<
+ "LAA: ReadWrite-Write positive dependency with different types\n");
+ return Dependence::Unknown;
+ }
+
+ unsigned Distance = (unsigned) Val.getZExtValue();
+
+ // Bail out early if passed-in parameters make vectorization not feasible.
+ unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
+ VectorizerParams::VectorizationFactor : 1);
+ unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
+ VectorizerParams::VectorizationInterleave : 1);
+
+ // The distance must be bigger than the size needed for a vectorized version
+ // of the operation and the size of the vectorized operation must not be
+ // bigger than the currrent maximum size.
+ if (Distance < 2*TypeByteSize ||
+ 2*TypeByteSize > MaxSafeDepDistBytes ||
+ Distance < TypeByteSize * ForcedUnroll * ForcedFactor) {
+ DEBUG(dbgs() << "LAA: Failure because of Positive distance "
+ << Val.getSExtValue() << '\n');
+ return Dependence::Backward;
+ }
+
+ // Positive distance bigger than max vectorization factor.
+ MaxSafeDepDistBytes = Distance < MaxSafeDepDistBytes ?
+ Distance : MaxSafeDepDistBytes;
+
+ bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
+ if (IsTrueDataDependence &&
+ couldPreventStoreLoadForward(Distance, TypeByteSize))
+ return Dependence::BackwardVectorizableButPreventsForwarding;
+
+ DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() <<
+ " with max VF = " << MaxSafeDepDistBytes / TypeByteSize << '\n');
+
+ return Dependence::BackwardVectorizable;
+}
+
+bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
+ MemAccessInfoSet &CheckDeps,
+ const ValueToValueMap &Strides) {
+
+ MaxSafeDepDistBytes = -1U;
+ while (!CheckDeps.empty()) {
+ MemAccessInfo CurAccess = *CheckDeps.begin();
+
+ // Get the relevant memory access set.
+ EquivalenceClasses<MemAccessInfo>::iterator I =
+ AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
+
+ // Check accesses within this set.
+ EquivalenceClasses<MemAccessInfo>::member_iterator AI, AE;
+ AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
+
+ // Check every access pair.
+ while (AI != AE) {
+ CheckDeps.erase(*AI);
+ EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
+ while (OI != AE) {
+ // Check every accessing instruction pair in program order.
+ for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
+ I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
+ for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
+ I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
+ auto A = std::make_pair(&*AI, *I1);
+ auto B = std::make_pair(&*OI, *I2);
+
+ assert(*I1 != *I2);
+ if (*I1 > *I2)
+ std::swap(A, B);
+
+ Dependence::DepType Type =
+ isDependent(*A.first, A.second, *B.first, B.second, Strides);
+ SafeForVectorization &= Dependence::isSafeForVectorization(Type);
+
+ // Gather dependences unless we accumulated MaxInterestingDependence
+ // dependences. In that case return as soon as we find the first
+ // unsafe dependence. This puts a limit on this quadratic
+ // algorithm.
+ if (RecordInterestingDependences) {
+ if (Dependence::isInterestingDependence(Type))
+ InterestingDependences.push_back(
+ Dependence(A.second, B.second, Type));
+
+ if (InterestingDependences.size() >= MaxInterestingDependence) {
+ RecordInterestingDependences = false;
+ InterestingDependences.clear();
+ DEBUG(dbgs() << "Too many dependences, stopped recording\n");
+ }
+ }
+ if (!RecordInterestingDependences && !SafeForVectorization)
+ return false;
+ }
+ ++OI;
+ }
+ AI++;
+ }
+ }
+
+ DEBUG(dbgs() << "Total Interesting Dependences: "
+ << InterestingDependences.size() << "\n");
+ return SafeForVectorization;
+}
+
+SmallVector<Instruction *, 4>
+MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
+ MemAccessInfo Access(Ptr, isWrite);
+ auto &IndexVector = Accesses.find(Access)->second;
+
+ SmallVector<Instruction *, 4> Insts;
+ std::transform(IndexVector.begin(), IndexVector.end(),
+ std::back_inserter(Insts),
+ [&](unsigned Idx) { return this->InstMap[Idx]; });
+ return Insts;
+}
+
+const char *MemoryDepChecker::Dependence::DepName[] = {
+ "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
+ "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
+
+void MemoryDepChecker::Dependence::print(
+ raw_ostream &OS, unsigned Depth,
+ const SmallVectorImpl<Instruction *> &Instrs) const {
+ OS.indent(Depth) << DepName[Type] << ":\n";
+ OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
+ OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
+}
+
+bool LoopAccessInfo::canAnalyzeLoop() {
+ // We need to have a loop header.
+ DEBUG(dbgs() << "LAA: Found a loop: " <<
+ TheLoop->getHeader()->getName() << '\n');
+
+ // We can only analyze innermost loops.
+ if (!TheLoop->empty()) {
+ DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
+ emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
+ return false;
+ }
+
+ // We must have a single backedge.
+ if (TheLoop->getNumBackEdges() != 1) {
+ DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+ emitAnalysis(
+ LoopAccessReport() <<
+ "loop control flow is not understood by analyzer");
+ return false;
+ }
+
+ // We must have a single exiting block.
+ if (!TheLoop->getExitingBlock()) {
+ DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+ emitAnalysis(
+ LoopAccessReport() <<
+ "loop control flow is not understood by analyzer");
+ return false;
+ }
+
+ // We only handle bottom-tested loops, i.e. loop in which the condition is
+ // checked at the end of each iteration. With that we can assume that all
+ // instructions in the loop are executed the same number of times.
+ if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
+ DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
+ emitAnalysis(
+ LoopAccessReport() <<
+ "loop control flow is not understood by analyzer");
+ return false;
+ }
+
+ // ScalarEvolution needs to be able to find the exit count.
+ const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
+ if (ExitCount == SE->getCouldNotCompute()) {
+ emitAnalysis(LoopAccessReport() <<
+ "could not determine number of loop iterations");
+ DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
+ return false;
+ }
+
+ return true;
+}
+
+void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
+
+ typedef SmallVector<Value*, 16> ValueVector;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+
+ // Holds the Load and Store *instructions*.
+ ValueVector Loads;
+ ValueVector Stores;
+
+ // Holds all the different accesses in the loop.
+ unsigned NumReads = 0;
+ unsigned NumReadWrites = 0;
+
+ PtrRtCheck.Pointers.clear();
+ PtrRtCheck.Need = false;
+
+ const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
+ // For each block.
+ for (Loop::block_iterator bb = TheLoop->block_begin(),
+ be = TheLoop->block_end(); bb != be; ++bb) {
+
+ // Scan the BB and collect legal loads and stores.
+ for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+ ++it) {
+
+ // If this is a load, save it. If this instruction can read from memory
+ // but is not a load, then we quit. Notice that we don't handle function
+ // calls that read or write.
+ if (it->mayReadFromMemory()) {
+ // Many math library functions read the rounding mode. We will only
+ // vectorize a loop if it contains known function calls that don't set
+ // the flag. Therefore, it is safe to ignore this read from memory.
+ CallInst *Call = dyn_cast<CallInst>(it);
+ if (Call && getIntrinsicIDForCall(Call, TLI))
+ continue;
+
+ // If the function has an explicit vectorized counterpart, we can safely
+ // assume that it can be vectorized.
+ if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
+ TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
+ continue;
+
+ LoadInst *Ld = dyn_cast<LoadInst>(it);
+ if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+ emitAnalysis(LoopAccessReport(Ld)
+ << "read with atomic ordering or volatile read");
+ DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
+ CanVecMem = false;
+ return;
+ }
+ NumLoads++;
+ Loads.push_back(Ld);
+ DepChecker.addAccess(Ld);
+ continue;
+ }
+
+ // Save 'store' instructions. Abort if other instructions write to memory.
+ if (it->mayWriteToMemory()) {
+ StoreInst *St = dyn_cast<StoreInst>(it);
+ if (!St) {
+ emitAnalysis(LoopAccessReport(it) <<
+ "instruction cannot be vectorized");
+ CanVecMem = false;
+ return;
+ }
+ if (!St->isSimple() && !IsAnnotatedParallel) {
+ emitAnalysis(LoopAccessReport(St)
+ << "write with atomic ordering or volatile write");
+ DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
+ CanVecMem = false;
+ return;
+ }
+ NumStores++;
+ Stores.push_back(St);
+ DepChecker.addAccess(St);
+ }
+ } // Next instr.
+ } // Next block.
+
+ // Now we have two lists that hold the loads and the stores.
+ // Next, we find the pointers that they use.
+
+ // Check if we see any stores. If there are no stores, then we don't
+ // care if the pointers are *restrict*.
+ if (!Stores.size()) {
+ DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
+ CanVecMem = true;
+ return;
+ }
+
+ MemoryDepChecker::DepCandidates DependentAccesses;
+ AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
+ AA, LI, DependentAccesses);
+
+ // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
+ // multiple times on the same object. If the ptr is accessed twice, once
+ // for read and once for write, it will only appear once (on the write
+ // list). This is okay, since we are going to check for conflicts between
+ // writes and between reads and writes, but not between reads and reads.
+ ValueSet Seen;
+
+ ValueVector::iterator I, IE;
+ for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
+ StoreInst *ST = cast<StoreInst>(*I);
+ Value* Ptr = ST->getPointerOperand();
+ // Check for store to loop invariant address.
+ StoreToLoopInvariantAddress |= isUniform(Ptr);
+ // If we did *not* see this pointer before, insert it to the read-write
+ // list. At this phase it is only a 'write' list.
+ if (Seen.insert(Ptr).second) {
+ ++NumReadWrites;
+
+ AliasAnalysis::Location Loc = AA->getLocation(ST);
+ // The TBAA metadata could have a control dependency on the predication
+ // condition, so we cannot rely on it when determining whether or not we
+ // need runtime pointer checks.
+ if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
+ Loc.AATags.TBAA = nullptr;
+
+ Accesses.addStore(Loc);
+ }
+ }
+
+ if (IsAnnotatedParallel) {
+ DEBUG(dbgs()
+ << "LAA: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
+ CanVecMem = true;
+ return;
+ }
+
+ for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
+ LoadInst *LD = cast<LoadInst>(*I);
+ Value* Ptr = LD->getPointerOperand();
+ // If we did *not* see this pointer before, insert it to the
+ // read list. If we *did* see it before, then it is already in
+ // the read-write list. This allows us to vectorize expressions
+ // such as A[i] += x; Because the address of A[i] is a read-write
+ // pointer. This only works if the index of A[i] is consecutive.
+ // If the address of i is unknown (for example A[B[i]]) then we may
+ // read a few words, modify, and write a few words, and some of the
+ // words may be written to the same address.
+ bool IsReadOnlyPtr = false;
+ if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
+ ++NumReads;
+ IsReadOnlyPtr = true;
+ }
+
+ AliasAnalysis::Location Loc = AA->getLocation(LD);
+ // The TBAA metadata could have a control dependency on the predication
+ // condition, so we cannot rely on it when determining whether or not we
+ // need runtime pointer checks.
+ if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
+ Loc.AATags.TBAA = nullptr;
+
+ Accesses.addLoad(Loc, IsReadOnlyPtr);
+ }
+
+ // If we write (or read-write) to a single destination and there are no
+ // other reads in this loop then is it safe to vectorize.
+ if (NumReadWrites == 1 && NumReads == 0) {
+ DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
+ CanVecMem = true;
+ return;
+ }
+
+ // Build dependence sets and check whether we need a runtime pointer bounds
+ // check.
+ Accesses.buildDependenceSets();
+ bool NeedRTCheck = Accesses.isRTCheckNeeded();
+
+ // Find pointers with computable bounds. We are going to use this information
+ // to place a runtime bound check.
+ bool CanDoRT = false;
+ if (NeedRTCheck)
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE, TheLoop,
+ Strides);
+
+ DEBUG(dbgs() << "LAA: We need to do " << NumComparisons <<
+ " pointer comparisons.\n");
+
+ // If we only have one set of dependences to check pointers among we don't
+ // need a runtime check.
+ if (NumComparisons == 0 && NeedRTCheck)
+ NeedRTCheck = false;
+
+ // Check that we found the bounds for the pointer.
+ if (CanDoRT)
+ DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
+ else if (NeedRTCheck) {
+ emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
+ DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
+ "the array bounds.\n");
+ PtrRtCheck.reset();
+ CanVecMem = false;
+ return;
+ }
+
+ PtrRtCheck.Need = NeedRTCheck;
+
+ CanVecMem = true;
+ if (Accesses.isDependencyCheckNeeded()) {
+ DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
+ CanVecMem = DepChecker.areDepsSafe(
+ DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
+ MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
+
+ if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
+ DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
+ NeedRTCheck = true;
+
+ // Clear the dependency checks. We assume they are not needed.
+ Accesses.resetDepChecks(DepChecker);
+
+ PtrRtCheck.reset();
+ PtrRtCheck.Need = true;
+
+ CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NumComparisons, SE,
+ TheLoop, Strides, true);
+ // Check that we found the bounds for the pointer.
+ if (!CanDoRT && NumComparisons > 0) {
+ emitAnalysis(LoopAccessReport()
+ << "cannot check memory dependencies at runtime");
+ DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
+ PtrRtCheck.reset();
+ CanVecMem = false;
+ return;
+ }
+
+ CanVecMem = true;
+ }
+ }
+
+ if (CanVecMem)
+ DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
+ << (NeedRTCheck ? "" : " don't")
+ << " need a runtime memory check.\n");
+ else {
+ emitAnalysis(LoopAccessReport() <<
+ "unsafe dependent memory operations in loop");
+ DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
+ }
+}
+
+bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
+ DominatorTree *DT) {
+ assert(TheLoop->contains(BB) && "Unknown block used");
+
+ // Blocks that do not dominate the latch need predication.
+ BasicBlock* Latch = TheLoop->getLoopLatch();
+ return !DT->dominates(BB, Latch);
+}
+
+void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
+ assert(!Report && "Multiple reports generated");
+ Report = Message;
+}
+
+bool LoopAccessInfo::isUniform(Value *V) const {
+ return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+}
+
+// FIXME: this function is currently a duplicate of the one in
+// LoopVectorize.cpp.
+static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
+ Instruction *Loc) {
+ if (FirstInst)
+ return FirstInst;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return I->getParent() == Loc->getParent() ? I : nullptr;
+ return nullptr;
+}
+
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
+ Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
+ if (!PtrRtCheck.Need)
+ return std::make_pair(nullptr, nullptr);
+
+ unsigned NumPointers = PtrRtCheck.Pointers.size();
+ SmallVector<TrackingVH<Value> , 2> Starts;
+ SmallVector<TrackingVH<Value> , 2> Ends;
+
+ LLVMContext &Ctx = Loc->getContext();
+ SCEVExpander Exp(*SE, DL, "induction");
+ Instruction *FirstInst = nullptr;
+
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ Value *Ptr = PtrRtCheck.Pointers[i];
+ const SCEV *Sc = SE->getSCEV(Ptr);
+
+ if (SE->isLoopInvariant(Sc, TheLoop)) {
+ DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
+ *Ptr <<"\n");
+ Starts.push_back(Ptr);
+ Ends.push_back(Ptr);
+ } else {
+ DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+
+ // Use this type for pointer arithmetic.
+ Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+
+ Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
+ Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
+ Starts.push_back(Start);
+ Ends.push_back(End);
+ }
+ }
+
+ IRBuilder<> ChkBuilder(Loc);
+ // Our instructions might fold to a constant.
+ Value *MemoryRuntimeCheck = nullptr;
+ for (unsigned i = 0; i < NumPointers; ++i) {
+ for (unsigned j = i+1; j < NumPointers; ++j) {
+ if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
+ continue;
+
+ unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
+ unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
+
+ assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
+ (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
+ "Trying to bounds check pointers with different address spaces");
+
+ Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+ Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+
+ Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
+ Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
+ Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
+ Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
+
+ Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+ FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
+ Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+ FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
+ Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ if (MemoryRuntimeCheck) {
+ IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
+ "conflict.rdx");
+ FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+ }
+ MemoryRuntimeCheck = IsConflict;
+ }
+ }
+
+ if (!MemoryRuntimeCheck)
+ return std::make_pair(nullptr, nullptr);
+
+ // We have to do this trickery because the IRBuilder might fold the check to a
+ // constant expression in which case there is no Instruction anchored in a
+ // the block.
+ Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
+ ConstantInt::getTrue(Ctx));
+ ChkBuilder.Insert(Check, "memcheck.conflict");
+ FirstInst = getFirstInst(FirstInst, Check, Loc);
+ return std::make_pair(FirstInst, Check);
+}
+
+LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
+ const DataLayout &DL,
+ const TargetLibraryInfo *TLI, AliasAnalysis *AA,
+ DominatorTree *DT, LoopInfo *LI,
+ const ValueToValueMap &Strides)
+ : DepChecker(SE, L), NumComparisons(0), TheLoop(L), SE(SE), DL(DL),
+ TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
+ MaxSafeDepDistBytes(-1U), CanVecMem(false),
+ StoreToLoopInvariantAddress(false) {
+ if (canAnalyzeLoop())
+ analyzeLoop(Strides);
+}
+
+void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
+ if (CanVecMem) {
+ if (PtrRtCheck.Need)
+ OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
+ else
+ OS.indent(Depth) << "Memory dependences are safe\n";
+ }
+
+ if (Report)
+ OS.indent(Depth) << "Report: " << Report->str() << "\n";
+
+ if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
+ OS.indent(Depth) << "Interesting Dependences:\n";
+ for (auto &Dep : *InterestingDependences) {
+ Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
+ OS << "\n";
+ }
+ } else
+ OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
+
+ // List the pair of accesses need run-time checks to prove independence.
+ PtrRtCheck.print(OS, Depth);
+ OS << "\n";
+
+ OS.indent(Depth) << "Store to invariant address was "
+ << (StoreToLoopInvariantAddress ? "" : "not ")
+ << "found in loop.\n";
+}
+
+const LoopAccessInfo &
+LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
+ auto &LAI = LoopAccessInfoMap[L];
+
+#ifndef NDEBUG
+ assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) &&
+ "Symbolic strides changed for loop");
+#endif
+
+ if (!LAI) {
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
+ Strides);
+#ifndef NDEBUG
+ LAI->NumSymbolicStrides = Strides.size();
+#endif
+ }
+ return *LAI.get();
+}
+
+void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
+ LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
+
+ ValueToValueMap NoSymbolicStrides;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop)) {
+ OS.indent(2) << L->getHeader()->getName() << ":\n";
+ auto &LAI = LAA.getInfo(L, NoSymbolicStrides);
+ LAI.print(OS, 4);
+ }
+}
+
+bool LoopAccessAnalysis::runOnFunction(Function &F) {
+ SE = &getAnalysis<ScalarEvolution>();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ TLI = TLIP ? &TLIP->getTLI() : nullptr;
+ AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ return false;
+}
+
+void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+
+ AU.setPreservesAll();
+}
+
+char LoopAccessAnalysis::ID = 0;
+static const char laa_name[] = "Loop Access Analysis";
+#define LAA_NAME "loop-accesses"
+
+INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
+
+namespace llvm {
+ Pass *createLAAPass() {
+ return new LoopAccessAnalysis();
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index b1f62c4..6b6faf8 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -26,8 +26,10 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <algorithm>
using namespace llvm;
@@ -45,11 +47,6 @@ static cl::opt<bool,true>
VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
cl::desc("Verify loop info (time consuming)"));
-char LoopInfo::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
-
// Loop identifier metadata name.
static const char *const LoopMDName = "llvm.loop";
@@ -59,20 +56,16 @@ static const char *const LoopMDName = "llvm.loop";
/// isLoopInvariant - Return true if the specified value is loop invariant
///
-bool Loop::isLoopInvariant(Value *V) const {
- if (Instruction *I = dyn_cast<Instruction>(V))
+bool Loop::isLoopInvariant(const Value *V) const {
+ if (const Instruction *I = dyn_cast<Instruction>(V))
return !contains(I);
return true; // All non-instructions are loop invariant
}
/// hasLoopInvariantOperands - Return true if all the operands of the
/// specified instruction are loop invariant.
-bool Loop::hasLoopInvariantOperands(Instruction *I) const {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (!isLoopInvariant(I->getOperand(i)))
- return false;
-
- return true;
+bool Loop::hasLoopInvariantOperands(const Instruction *I) const {
+ return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); });
}
/// makeLoopInvariant - If the given value is an instruciton inside of the
@@ -609,15 +602,6 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
return NearLoop;
}
-//===----------------------------------------------------------------------===//
-// LoopInfo implementation
-//
-bool LoopInfo::runOnFunction(Function &) {
- releaseMemory();
- LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
- return false;
-}
-
/// updateUnloop - The last backedge has been removed from a loop--now the
/// "unloop". Find a new parent for the blocks contained within unloop and
/// update the loop tree. We don't necessarily have valid dominators at this
@@ -631,7 +615,8 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
if (!Unloop->getParentLoop()) {
// Since BBLoop had no parent, Unloop blocks are no longer in a loop.
for (Loop::block_iterator I = Unloop->block_begin(),
- E = Unloop->block_end(); I != E; ++I) {
+ E = Unloop->block_end();
+ I != E; ++I) {
// Don't reparent blocks in subloops.
if (getLoopFor(*I) != Unloop)
@@ -639,21 +624,21 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
// Blocks no longer have a parent but are still referenced by Unloop until
// the Unloop object is deleted.
- LI.changeLoopFor(*I, nullptr);
+ changeLoopFor(*I, nullptr);
}
// Remove the loop from the top-level LoopInfo object.
- for (LoopInfo::iterator I = LI.begin();; ++I) {
- assert(I != LI.end() && "Couldn't find loop");
+ for (iterator I = begin();; ++I) {
+ assert(I != end() && "Couldn't find loop");
if (*I == Unloop) {
- LI.removeLoop(I);
+ removeLoop(I);
break;
}
}
// Move all of the subloops to the top-level.
while (!Unloop->empty())
- LI.addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
+ addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
return;
}
@@ -680,35 +665,59 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
}
}
-void LoopInfo::verifyAnalysis() const {
- // LoopInfo is a FunctionPass, but verifying every loop in the function
- // each time verifyAnalysis is called is very expensive. The
- // -verify-loop-info option can enable this. In order to perform some
- // checking by default, LoopPass has been taught to call verifyLoop
- // manually during loop pass sequences.
+char LoopAnalysis::PassID;
+
+LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) {
+ // FIXME: Currently we create a LoopInfo from scratch for every function.
+ // This may prove to be too wasteful due to deallocating and re-allocating
+ // memory each time for the underlying map and vector datastructures. At some
+ // point it may prove worthwhile to use a freelist and recycle LoopInfo
+ // objects. I don't want to add that kind of complexity until the scope of
+ // the problem is better understood.
+ LoopInfo LI;
+ LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F));
+ return LI;
+}
- if (!VerifyLoopInfo) return;
+PreservedAnalyses LoopPrinterPass::run(Function &F,
+ AnalysisManager<Function> *AM) {
+ AM->getResult<LoopAnalysis>(F).print(OS);
+ return PreservedAnalyses::all();
+}
- DenseSet<const Loop*> Loops;
- for (iterator I = begin(), E = end(); I != E; ++I) {
- assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
- (*I)->verifyLoopNest(&Loops);
- }
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
- // Verify that blocks are mapped to valid loops.
- for (DenseMap<BasicBlock*, Loop*>::const_iterator I = LI.BBMap.begin(),
- E = LI.BBMap.end(); I != E; ++I) {
- assert(Loops.count(I->second) && "orphaned loop");
- assert(I->second->contains(I->first) && "orphaned block");
- }
+char LoopInfoWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfoWrapperPass, "loops", "Natural Loop Information",
+ true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information",
+ true, true)
+
+bool LoopInfoWrapperPass::runOnFunction(Function &) {
+ releaseMemory();
+ LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+ return false;
+}
+
+void LoopInfoWrapperPass::verifyAnalysis() const {
+ // LoopInfoWrapperPass is a FunctionPass, but verifying every loop in the
+ // function each time verifyAnalysis is called is very expensive. The
+ // -verify-loop-info option can enable this. In order to perform some
+ // checking by default, LoopPass has been taught to call verifyLoop manually
+ // during loop pass sequences.
+ if (VerifyLoopInfo)
+ LI.verify();
}
-void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<DominatorTreeWrapperPass>();
}
-void LoopInfo::print(raw_ostream &OS, const Module*) const {
+void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
LI.print(OS);
}
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 190abc7..e9fcf02 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "loop-pass-manager"
@@ -187,14 +188,15 @@ static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
// LPPassManager needs LoopInfo. In the long term LoopInfo class will
// become part of LPPassManager.
- Info.addRequired<LoopInfo>();
+ Info.addRequired<LoopInfoWrapperPass>();
Info.setPreservesAll();
}
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the function, and if so, return true.
bool LPPassManager::runOnFunction(Function &F) {
- LI = &getAnalysis<LoopInfo>();
+ auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
+ LI = &LIWP.getLoopInfo();
bool Changed = false;
// Collect inherited analysis from Module level pass manager.
@@ -262,7 +264,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// loop in the function every time. That level of checking can be
// enabled with the -verify-loop-info option.
{
- TimeRegion PassTimer(getPassTimer(LI));
+ TimeRegion PassTimer(getPassTimer(&LIWP));
CurrentLoop->verifyLoop();
}
diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
index ffc9fe6..da3b829 100644
--- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp
@@ -96,8 +96,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
// All this code uses non-const interfaces because MemDep is not
// const-friendly, though nothing is actually modified.
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- Instruction *Inst = &*I;
+ for (auto &I : inst_range(F)) {
+ Instruction *Inst = &I;
if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
continue;
@@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
if (!Res.isNonLocal()) {
Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
static_cast<BasicBlock *>(nullptr)));
- } else if (CallSite CS = cast<Value>(Inst)) {
+ } else if (auto CS = CallSite(Inst)) {
const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
MDA.getNonLocalCallDependency(CS);
@@ -135,8 +135,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
}
void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
- for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
- const Instruction *Inst = &*I;
+ for (const auto &I : inst_range(*F)) {
+ const Instruction *Inst = &I;
DepSetMap::const_iterator DI = Deps.find(Inst);
if (DI == Deps.end())
@@ -144,11 +144,10 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
const DepSet &InstDeps = DI->second;
- for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
- I != E; ++I) {
- const Instruction *DepInst = I->first.getPointer();
- DepType type = I->first.getInt();
- const BasicBlock *DepBB = I->second;
+ for (const auto &I : InstDeps) {
+ const Instruction *DepInst = I.first.getPointer();
+ DepType type = I.first.getInt();
+ const BasicBlock *DepBB = I.second;
OS << " ";
OS << DepTypeStr[type];
diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
new file mode 100644
index 0000000..fa292a2
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -0,0 +1,70 @@
+//===- MemDerefPrinter.cpp - Printer for isDereferenceablePointer ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ struct MemDerefPrinter : public FunctionPass {
+ SmallVector<Value *, 4> Vec;
+
+ static char ID; // Pass identification, replacement for typeid
+ MemDerefPrinter() : FunctionPass(ID) {
+ initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ bool runOnFunction(Function &F) override;
+ void print(raw_ostream &OS, const Module * = nullptr) const override;
+ void releaseMemory() override {
+ Vec.clear();
+ }
+ };
+}
+
+char MemDerefPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs",
+ "Memory Dereferenciblity of pointers in function", false, true)
+INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs",
+ "Memory Dereferenciblity of pointers in function", false, true)
+
+FunctionPass *llvm::createMemDerefPrinter() {
+ return new MemDerefPrinter();
+}
+
+bool MemDerefPrinter::runOnFunction(Function &F) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ for (auto &I: inst_range(F)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ Value *PO = LI->getPointerOperand();
+ if (isDereferenceablePointer(PO, DL))
+ Vec.push_back(PO);
+ }
+ }
+ return false;
+}
+
+void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
+ OS << "The following are dereferenceable:\n";
+ for (auto &V: Vec) {
+ V->print(OS);
+ OS << "\n\n";
+ }
+}
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index 08b41fe..8ddac8f 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalVariable.h"
@@ -25,7 +26,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -206,7 +206,7 @@ const CallInst *llvm::extractMallocCall(const Value *I,
return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
}
-static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
+static Value *computeArraySize(const CallInst *CI, const DataLayout &DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt = false) {
if (!CI)
@@ -214,12 +214,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
// The size of the malloc's result type must be known to determine array size.
Type *T = getMallocAllocatedType(CI, TLI);
- if (!T || !T->isSized() || !DL)
+ if (!T || !T->isSized())
return nullptr;
- unsigned ElementSize = DL->getTypeAllocSize(T);
+ unsigned ElementSize = DL.getTypeAllocSize(T);
if (StructType *ST = dyn_cast<StructType>(T))
- ElementSize = DL->getStructLayout(ST)->getSizeInBytes();
+ ElementSize = DL.getStructLayout(ST)->getSizeInBytes();
// If malloc call's arg can be determined to be a multiple of ElementSize,
// return the multiple. Otherwise, return NULL.
@@ -232,23 +232,6 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
return nullptr;
}
-/// isArrayMalloc - Returns the corresponding CallInst if the instruction
-/// is a call to malloc whose array size can be determined and the array size
-/// is not constant 1. Otherwise, return NULL.
-const CallInst *llvm::isArrayMalloc(const Value *I,
- const DataLayout *DL,
- const TargetLibraryInfo *TLI) {
- const CallInst *CI = extractMallocCall(I, TLI);
- Value *ArraySize = computeArraySize(CI, DL, TLI);
-
- if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
- if (ConstSize->isOne())
- return CI;
-
- // CI is a non-array malloc or we can't figure out that it is an array malloc.
- return nullptr;
-}
-
/// getMallocType - Returns the PointerType resulting from the malloc call.
/// The PointerType depends on the number of bitcast uses of the malloc call:
/// 0: PointerType is the calls' return type.
@@ -297,7 +280,7 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI,
/// then return that multiple. For non-array mallocs, the multiple is
/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
/// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL,
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout &DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt) {
assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
@@ -319,7 +302,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
if (!CI || isa<IntrinsicInst>(CI))
return nullptr;
Function *Callee = CI->getCalledFunction();
- if (Callee == nullptr || !Callee->isDeclaration())
+ if (Callee == nullptr)
return nullptr;
StringRef FnName = Callee->getName();
@@ -367,11 +350,8 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
/// object size in Size if successful, and false otherwise.
/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
/// byval arguments, and global variables.
-bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL,
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
const TargetLibraryInfo *TLI, bool RoundToAlign) {
- if (!DL)
- return false;
-
ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign);
SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
if (!Visitor.bothKnown(Data))
@@ -399,17 +379,17 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
return Size;
}
-ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
const TargetLibraryInfo *TLI,
LLVMContext &Context,
bool RoundToAlign)
-: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
+ : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
// Pointer size must be rechecked for each object visited since it could have
// a different address space.
}
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
- IntTyBits = DL->getPointerTypeSizeInBits(V->getType());
+ IntTyBits = DL.getPointerTypeSizeInBits(V->getType());
Zero = APInt::getNullValue(IntTyBits);
V = V->stripPointerCasts();
@@ -449,7 +429,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
- APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType()));
+ APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType()));
if (!I.isArrayAllocation())
return std::make_pair(align(Size, I.getAlignment()), Zero);
@@ -468,7 +448,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
return unknown();
}
PointerType *PT = cast<PointerType>(A.getType());
- APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType()));
+ APInt Size(IntTyBits, DL.getTypeAllocSize(PT->getElementType()));
return std::make_pair(align(Size, A.getParamAlignment()), Zero);
}
@@ -541,7 +521,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
SizeOffsetType PtrData = compute(GEP.getPointerOperand());
APInt Offset(IntTyBits, 0);
- if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset))
+ if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset))
return unknown();
return std::make_pair(PtrData.first, PtrData.second + Offset);
@@ -557,7 +537,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
if (!GV.hasDefinitiveInitializer())
return unknown();
- APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType()));
+ APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getType()->getElementType()));
return std::make_pair(align(Size, GV.getAlignment()), Zero);
}
@@ -593,19 +573,18 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
return unknown();
}
-ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
- const TargetLibraryInfo *TLI,
- LLVMContext &Context,
- bool RoundToAlign)
-: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
- RoundToAlign(RoundToAlign) {
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
+ const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context,
+ bool RoundToAlign)
+ : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
+ RoundToAlign(RoundToAlign) {
// IntTy and Zero must be set for each compute() since the address space may
// be different for later objects.
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
// XXX - Are vectors of pointers possible here?
- IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
+ IntTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
Zero = ConstantInt::get(IntTy, 0);
SizeOffsetEvalType Result = compute_(V);
@@ -687,7 +666,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
assert(I.isArrayAllocation());
Value *ArraySize = I.getArraySize();
Value *Size = ConstantInt::get(ArraySize->getType(),
- DL->getTypeAllocSize(I.getAllocatedType()));
+ DL.getTypeAllocSize(I.getAllocatedType()));
Size = Builder.CreateMul(Size, ArraySize);
return std::make_pair(Size, Zero);
}
@@ -739,7 +718,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
if (!bothKnown(PtrData))
return unknown();
- Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true);
+ Value *Offset = EmitGEPOffset(&Builder, DL, &GEP, /*NoAssumptions=*/true);
Offset = Builder.CreateAdd(PtrData.second, Offset);
return std::make_pair(PtrData.first, Offset);
}
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index c505aa4..3c1826a 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -65,7 +65,7 @@ INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
"Memory Dependence Analysis", false, true)
MemoryDependenceAnalysis::MemoryDependenceAnalysis()
- : FunctionPass(ID), PredCache() {
+ : FunctionPass(ID) {
initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
}
MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
@@ -79,11 +79,9 @@ void MemoryDependenceAnalysis::releaseMemory() {
ReverseLocalDeps.clear();
ReverseNonLocalDeps.clear();
ReverseNonLocalPtrDeps.clear();
- PredCache->clear();
+ PredCache.clear();
}
-
-
/// getAnalysisUsage - Does not modify anything. It uses Alias Analysis.
///
void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -95,13 +93,9 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DT = DTWP ? &DTWP->getDomTree() : nullptr;
- if (!PredCache)
- PredCache.reset(new PredIteratorCache());
return false;
}
@@ -227,7 +221,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
continue;
}
- if (CallSite InstCS = cast<Value>(Inst)) {
+ if (auto InstCS = CallSite(Inst)) {
// Debug intrinsics don't cause dependences.
if (isa<DbgInfoIntrinsic>(Inst)) continue;
// If these two calls do not interfere, look past it.
@@ -265,22 +259,17 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
///
/// MemLocBase, MemLocOffset are lazily computed here the first time the
/// base/offs of memloc is needed.
-static bool
-isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
- const Value *&MemLocBase,
- int64_t &MemLocOffs,
- const LoadInst *LI,
- const DataLayout *DL) {
- // If we have no target data, we can't do this.
- if (!DL) return false;
+static bool isLoadLoadClobberIfExtendedToFullWidth(
+ const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase,
+ int64_t &MemLocOffs, const LoadInst *LI) {
+ const DataLayout &DL = LI->getModule()->getDataLayout();
// If we haven't already computed the base/offset of MemLoc, do so now.
if (!MemLocBase)
MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
- unsigned Size = MemoryDependenceAnalysis::
- getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
- LI, *DL);
+ unsigned Size = MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+ MemLocBase, MemLocOffs, MemLoc.Size, LI);
return Size != 0;
}
@@ -291,23 +280,23 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
/// 2) safe for the target, and 3) would provide the specified memory
/// location value, then this function returns the size in bytes of the
/// load width to use. If not, this returns zero.
-unsigned MemoryDependenceAnalysis::
-getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
- unsigned MemLocSize, const LoadInst *LI,
- const DataLayout &DL) {
+unsigned MemoryDependenceAnalysis::getLoadLoadClobberFullWidthSize(
+ const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
+ const LoadInst *LI) {
// We can only extend simple integer loads.
if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
// Load widening is hostile to ThreadSanitizer: it may cause false positives
// or make the reports more cryptic (access sizes are wrong).
- if (LI->getParent()->getParent()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread))
+ if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return 0;
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+
// Get the base of this load.
int64_t LIOffs = 0;
const Value *LIBase =
- GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL);
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
// If the two pointers are not based on the same pointer, we can't tell that
// they are related.
@@ -346,9 +335,9 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
!DL.fitsInLegalInteger(NewLoadByteSize*8))
return 0;
- if (LIOffs+NewLoadByteSize > MemLocEnd &&
- LI->getParent()->getParent()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress))
+ if (LIOffs + NewLoadByteSize > MemLocEnd &&
+ LI->getParent()->getParent()->hasFnAttribute(
+ Attribute::SanitizeAddress))
// We will be reading past the location accessed by the original program.
// While this is safe in a regular build, Address Safety analysis tools
// may start reporting false warnings. So, don't do widening.
@@ -362,6 +351,17 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
}
}
+static bool isVolatile(Instruction *Inst) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI->isVolatile();
+ else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->isVolatile();
+ else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
+ return AI->isVolatile();
+ return false;
+}
+
+
/// getPointerDependencyFrom - Return the instruction on which a memory
/// location depends. If isLoad is true, this routine ignores may-aliases with
/// read-only operations. If isLoad is false, this routine ignores may-aliases
@@ -405,14 +405,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// by every program that can detect any optimisation of that kind: either
// it is racy (undefined) or there is a release followed by an acquire
// between the pair of accesses under consideration.
- bool HasSeenAcquire = false;
+ // If the load is invariant, we "know" that it doesn't alias *any* write. We
+ // do want to respect mustalias results since defs are useful for value
+ // forwarding, but any mayalias write can be assumed to be noalias.
+ // Arguably, this logic should be pushed inside AliasAnalysis itself.
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
isInvariantLoad = true;
}
+ const DataLayout &DL = BB->getModule()->getDataLayout();
+
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
Instruction *Inst = --ScanIt;
@@ -448,14 +453,28 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// does not alias with when this atomic load indicates that another thread may
// be accessing the location.
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+
+ // While volatile access cannot be eliminated, they do not have to clobber
+ // non-aliasing locations, as normal accesses, for example, can be safely
+ // reordered with volatile accesses.
+ if (LI->isVolatile()) {
+ if (!QueryInst)
+ // Original QueryInst *may* be volatile
+ return MemDepResult::getClobber(LI);
+ if (isVolatile(QueryInst))
+ // Ordering required if QueryInst is itself volatile
+ return MemDepResult::getClobber(LI);
+ // Otherwise, volatile doesn't imply any special ordering
+ }
+
// Atomic loads have complications involved.
// A Monotonic (or higher) load is OK if the query inst is itself not atomic.
- // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
- // release store will know to return getClobber.
// FIXME: This is overly conservative.
- if (!LI->isUnordered()) {
+ if (LI->isAtomic() && LI->getOrdering() > Unordered) {
if (!QueryInst)
return MemDepResult::getClobber(LI);
+ if (LI->getOrdering() != Monotonic)
+ return MemDepResult::getClobber(LI);
if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
if (!QueryLI->isSimple())
return MemDepResult::getClobber(LI);
@@ -465,18 +484,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
} else if (QueryInst->mayReadOrWriteMemory()) {
return MemDepResult::getClobber(LI);
}
-
- if (isAtLeastAcquire(LI->getOrdering()))
- HasSeenAcquire = true;
}
- // FIXME: this is overly conservative.
- // While volatile access cannot be eliminated, they do not have to clobber
- // non-aliasing locations, as normal accesses can for example be reordered
- // with volatile accesses.
- if (LI->isVolatile())
- return MemDepResult::getClobber(LI);
-
AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
// If we found a pointer, check if it could be the same as our pointer.
@@ -490,12 +499,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
// location is 1 byte at P+1). If so, return it as a load/load
// clobber result, allowing the client to decide to widen the load if
// it wants to.
- if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
- if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+ if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+ if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
- MemLocOffset, LI, DL))
+ MemLocOffset, LI))
return MemDepResult::getClobber(Inst);
-
+ }
continue;
}
@@ -534,12 +543,12 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Atomic stores have complications involved.
// A Monotonic store is OK if the query inst is itself not atomic.
- // A Release (or higher) store further requires that no acquire load
- // has been seen.
// FIXME: This is overly conservative.
if (!SI->isUnordered()) {
if (!QueryInst)
return MemDepResult::getClobber(SI);
+ if (SI->getOrdering() != Monotonic)
+ return MemDepResult::getClobber(SI);
if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst)) {
if (!QueryLI->isSimple())
return MemDepResult::getClobber(SI);
@@ -549,9 +558,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
} else if (QueryInst->mayReadOrWriteMemory()) {
return MemDepResult::getClobber(SI);
}
-
- if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
- return MemDepResult::getClobber(SI);
}
// FIXME: this is overly conservative.
@@ -597,6 +603,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
return MemDepResult::getDef(Inst);
+ if (isInvariantLoad)
+ continue;
// Be conservative if the accessed pointer may alias the allocation.
if (AA->alias(Inst, AccessPtr) != AliasAnalysis::NoAlias)
return MemDepResult::getClobber(Inst);
@@ -607,6 +615,9 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
continue;
}
+ if (isInvariantLoad)
+ continue;
+
// See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
// If necessary, perform additional analysis.
@@ -757,8 +768,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
} else {
// Seed DirtyBlocks with each of the preds of QueryInst's block.
BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
- for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
- DirtyBlocks.push_back(*PI);
+ for (BasicBlock *Pred : PredCache.get(QueryBB))
+ DirtyBlocks.push_back(Pred);
++NumUncacheNonLocal;
}
@@ -843,8 +854,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
// If the block *is* completely transparent to the load, we need to check
// the predecessors of this block. Add them to our worklist.
- for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
- DirtyBlocks.push_back(*PI);
+ for (BasicBlock *Pred : PredCache.get(DirtyBB))
+ DirtyBlocks.push_back(Pred);
}
}
@@ -861,23 +872,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
void MemoryDependenceAnalysis::
getNonLocalPointerDependency(Instruction *QueryInst,
SmallVectorImpl<NonLocalDepResult> &Result) {
-
- auto getLocation = [](AliasAnalysis *AA, Instruction *Inst) {
- if (auto *I = dyn_cast<LoadInst>(Inst))
- return AA->getLocation(I);
- else if (auto *I = dyn_cast<StoreInst>(Inst))
- return AA->getLocation(I);
- else if (auto *I = dyn_cast<VAArgInst>(Inst))
- return AA->getLocation(I);
- else if (auto *I = dyn_cast<AtomicCmpXchgInst>(Inst))
- return AA->getLocation(I);
- else if (auto *I = dyn_cast<AtomicRMWInst>(Inst))
- return AA->getLocation(I);
- else
- llvm_unreachable("unsupported memory instruction");
- };
-
- const AliasAnalysis::Location Loc = getLocation(AA, QueryInst);
+ const AliasAnalysis::Location Loc = AA->getLocation(QueryInst);
bool isLoad = isa<LoadInst>(QueryInst);
BasicBlock *FromBB = QueryInst->getParent();
assert(FromBB);
@@ -890,14 +885,7 @@ getNonLocalPointerDependency(Instruction *QueryInst,
// Doing so would require piping through the QueryInst all the way through.
// TODO: volatiles can't be elided, but they can be reordered with other
// non-volatile accesses.
- auto isVolatile = [](Instruction *Inst) {
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
- return LI->isVolatile();
- } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- return SI->isVolatile();
- }
- return false;
- };
+
// We currently give up on any instruction which is ordered, but we do handle
// atomic instructions which are unordered.
// TODO: Handle ordered instructions
@@ -915,8 +903,7 @@ getNonLocalPointerDependency(Instruction *QueryInst,
const_cast<Value *>(Loc.Ptr)));
return;
}
-
-
+ const DataLayout &DL = FromBB->getModule()->getDataLayout();
PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, AC);
// This is the set of blocks we've inspected, and the pointer we consider in
@@ -924,7 +911,7 @@ getNonLocalPointerDependency(Instruction *QueryInst,
// a block with multiple different pointers. This can happen during PHI
// translation.
DenseMap<BasicBlock*, Value*> Visited;
- if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+ if (!getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
Result, Visited, true))
return;
Result.clear();
@@ -938,7 +925,8 @@ getNonLocalPointerDependency(Instruction *QueryInst,
/// lookup (which may use dirty cache info if available). If we do a lookup,
/// add the result to the cache.
MemDepResult MemoryDependenceAnalysis::
-GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+GetNonLocalInfoForBlock(Instruction *QueryInst,
+ const AliasAnalysis::Location &Loc,
bool isLoad, BasicBlock *BB,
NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
@@ -979,7 +967,8 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
}
// Scan the block for the dependency.
- MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+ MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
+ QueryInst);
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
@@ -1052,7 +1041,8 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
/// not compute dependence information for some reason. This should be treated
/// as a clobber dependence on the first instruction in the predecessor block.
bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+getNonLocalPointerDepFromBB(Instruction *QueryInst,
+ const PHITransAddr &Pointer,
const AliasAnalysis::Location &Loc,
bool isLoad, BasicBlock *StartBB,
SmallVectorImpl<NonLocalDepResult> &Result,
@@ -1091,7 +1081,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
} else if (CacheInfo->Size > Loc.Size) {
// This query's Size is less than the cached one. Conservatively restart
// the query using the greater size.
- return getNonLocalPointerDepFromBB(Pointer,
+ return getNonLocalPointerDepFromBB(QueryInst, Pointer,
Loc.getWithNewSize(CacheInfo->Size),
isLoad, StartBB, Result, Visited,
SkipFirstBlock);
@@ -1111,7 +1101,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
CacheInfo->NonLocalDeps.clear();
}
if (Loc.AATags)
- return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutAATags(),
+ return getNonLocalPointerDepFromBB(QueryInst,
+ Pointer, Loc.getWithoutAATags(),
isLoad, StartBB, Result, Visited,
SkipFirstBlock);
}
@@ -1214,7 +1205,8 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Get the dependency info for Pointer in BB. If we have cached
// information, we will use it, otherwise we compute it.
DEBUG(AssertSorted(*Cache, NumSortedEntries));
- MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+ MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst,
+ Loc, isLoad, BB, Cache,
NumSortedEntries);
// If we got a Def or Clobber, add this to the list of results.
@@ -1238,13 +1230,13 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
SkipFirstBlock = false;
SmallVector<BasicBlock*, 16> NewBlocks;
- for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+ for (BasicBlock *Pred : PredCache.get(BB)) {
// Verify that we haven't looked at this block yet.
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
- InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
+ InsertRes = Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
if (InsertRes.second) {
// First time we've looked at *PI.
- NewBlocks.push_back(*PI);
+ NewBlocks.push_back(Pred);
continue;
}
@@ -1280,8 +1272,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
Cache = nullptr;
PredList.clear();
- for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
- BasicBlock *Pred = *PI;
+ for (BasicBlock *Pred : PredCache.get(BB)) {
PredList.push_back(std::make_pair(Pred, Pointer));
// Get the PHI translated pointer in this predecessor. This can fail if
@@ -1348,7 +1339,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// result conflicted with the Visited list; we have to conservatively
// assume it is unknown, but this also does not block PRE of the load.
if (!CanTranslate ||
- getNonLocalPointerDepFromBB(PredPointer,
+ getNonLocalPointerDepFromBB(QueryInst, PredPointer,
Loc.getWithNewPtr(PredPtrVal),
isLoad, Pred,
Result, Visited)) {
@@ -1471,7 +1462,7 @@ void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
/// This needs to be done when the CFG changes, e.g., due to splitting
/// critical edges.
void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
- PredCache->clear();
+ PredCache.clear();
}
/// removeInstruction - Remove an instruction from the dependence analysis,
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index f645558..36c4714 100644
--- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -55,28 +55,72 @@ bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
return false;
}
+static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory,
+ unsigned Line = 0) {
+ if (Filename.empty())
+ return;
+
+ O << " from ";
+ if (!Directory.empty())
+ O << Directory << "/";
+ O << Filename;
+ if (Line)
+ O << ":" << Line;
+}
+
void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
- for (DICompileUnit CU : Finder.compile_units()) {
- O << "Compile Unit: ";
- CU.print(O);
+ // Printing the nodes directly isn't particularly helpful (since they
+ // reference other nodes that won't be printed, particularly for the
+ // filenames), so just print a few useful things.
+ for (DICompileUnit *CU : Finder.compile_units()) {
+ O << "Compile unit: ";
+ if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
+ O << Lang;
+ else
+ O << "unknown-language(" << CU->getSourceLanguage() << ")";
+ printFile(O, CU->getFilename(), CU->getDirectory());
O << '\n';
}
- for (DISubprogram S : Finder.subprograms()) {
- O << "Subprogram: ";
- S.print(O);
+ for (DISubprogram *S : Finder.subprograms()) {
+ O << "Subprogram: " << S->getName();
+ printFile(O, S->getFilename(), S->getDirectory(), S->getLine());
+ if (!S->getLinkageName().empty())
+ O << " ('" << S->getLinkageName() << "')";
O << '\n';
}
- for (DIGlobalVariable GV : Finder.global_variables()) {
- O << "GlobalVariable: ";
- GV.print(O);
+ for (const DIGlobalVariable *GV : Finder.global_variables()) {
+ O << "Global variable: " << GV->getName();
+ printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
+ if (!GV->getLinkageName().empty())
+ O << " ('" << GV->getLinkageName() << "')";
O << '\n';
}
- for (DIType T : Finder.types()) {
- O << "Type: ";
- T.print(O);
+ for (const DIType *T : Finder.types()) {
+ O << "Type:";
+ if (!T->getName().empty())
+ O << ' ' << T->getName();
+ printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
+ if (auto *BT = dyn_cast<DIBasicType>(T)) {
+ O << " ";
+ if (const char *Encoding =
+ dwarf::AttributeEncodingString(BT->getEncoding()))
+ O << Encoding;
+ else
+ O << "unknown-encoding(" << BT->getEncoding() << ')';
+ } else {
+ O << ' ';
+ if (const char *Tag = dwarf::TagString(T->getTag()))
+ O << Tag;
+ else
+ O << "unknown-tag(" << T->getTag() << ")";
+ }
+ if (auto *CT = dyn_cast<DICompositeType>(T)) {
+ if (auto *S = CT->getRawIdentifier())
+ O << " (identifier: '" << S->getString() << "')";
+ }
O << '\n';
}
}
diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
index c214d3c..203e1da 100644
--- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -33,11 +34,11 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {}
- void initializePass() override {
+ bool doInitialization(Module &M) override {
// Note: NoAA does not call InitializeAliasAnalysis because it's
// special and does not support chaining.
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ DL = &M.getDataLayout();
+ return true;
}
AliasResult alias(const Location &LocA, const Location &LocB) override {
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
index a534418..177684f 100644
--- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -404,10 +404,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
GEPOps.push_back(OpVal);
}
- GetElementPtrInst *Result =
- GetElementPtrInst::Create(GEPOps[0], makeArrayRef(GEPOps).slice(1),
- InVal->getName()+".phi.trans.insert",
- PredBB->getTerminator());
+ GetElementPtrInst *Result = GetElementPtrInst::Create(
+ GEP->getSourceElementType(), GEPOps[0], makeArrayRef(GEPOps).slice(1),
+ InVal->getName() + ".phi.trans.insert", PredBB->getTerminator());
Result->setIsInBounds(GEP->isInBounds());
NewInsts.push_back(Result);
return Result;
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
index 6fa7b2e..5e1cdd4 100644
--- a/contrib/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "regionpassmgr"
@@ -83,9 +84,11 @@ bool RGPassManager::runOnFunction(Function &F) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
RegionPass *P = (RegionPass*)getContainedPass(Index);
- dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
- CurrentRegion->getNameStr());
- dumpRequiredSet(P);
+ if (isPassDebuggingExecutionsOrMore()) {
+ dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+ CurrentRegion->getNameStr());
+ dumpRequiredSet(P);
+ }
initializeAnalysisImpl(P);
@@ -96,11 +99,13 @@ bool RGPassManager::runOnFunction(Function &F) {
Changed |= P->runOnRegion(CurrentRegion, *this);
}
- if (Changed)
- dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
- skipThisRegion ? "<deleted>" :
- CurrentRegion->getNameStr());
- dumpPreservedSet(P);
+ if (isPassDebuggingExecutionsOrMore()) {
+ if (Changed)
+ dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+ skipThisRegion ? "<deleted>" :
+ CurrentRegion->getNameStr());
+ dumpPreservedSet(P);
+ }
if (!skipThisRegion) {
// Manually check that this region is still healthy. This is done
@@ -120,8 +125,8 @@ bool RGPassManager::runOnFunction(Function &F) {
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
removeDeadPasses(P,
- skipThisRegion ? "<deleted>" :
- CurrentRegion->getNameStr(),
+ (!isPassDebuggingExecutionsOrMore() || skipThisRegion) ?
+ "<deleted>" : CurrentRegion->getNameStr(),
ON_REGION_MSG);
if (skipThisRegion)
@@ -194,7 +199,7 @@ public:
bool runOnRegion(Region *R, RGPassManager &RGM) override {
Out << Banner;
- for (const auto &BB : R->blocks()) {
+ for (const auto *BB : R->blocks()) {
if (BB)
BB->print(Out);
else
diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
index ad83113..d7f5109 100644
--- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp
@@ -123,7 +123,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
const RegionInfo &RI = *static_cast<const RegionInfo*>(R.getRegionInfo());
- for (const auto &BB : R.blocks())
+ for (auto *BB : R.blocks())
if (RI.getRegionFor(BB) == &R)
O.indent(2 * (depth + 1)) << "Node"
<< static_cast<const void*>(RI.getTopLevelRegion()->getBBNode(BB))
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index ee42737..0e9f812 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -68,6 +68,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
@@ -87,7 +88,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include <algorithm>
using namespace llvm;
@@ -117,9 +117,9 @@ VerifySCEV("verify-scev",
INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
char ScalarEvolution::ID = 0;
@@ -726,6 +726,13 @@ public:
return;
}
+ // A simple case when N/1. The quotient is N.
+ if (Denominator->isOne()) {
+ *Quotient = Numerator;
+ *Remainder = D.Zero;
+ return;
+ }
+
// Split the Denominator when it is a product.
if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
const SCEV *Q, *R;
@@ -788,6 +795,14 @@ public:
assert(Numerator->isAffine() && "Numerator should be affine");
divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+ // Bail out if the types do not match.
+ Type *Ty = Denominator->getType();
+ if (Ty != StartQ->getType() || Ty != StartR->getType() ||
+ Ty != StepQ->getType() || Ty != StepR->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
Numerator->getNoWrapFlags());
Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
@@ -1102,13 +1117,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
// trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
- // eliminate all the truncates.
+ // eliminate all the truncates, or we replace other casts with truncates.
if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
bool hasTrunc = false;
for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
- hasTrunc = isa<SCEVTruncateExpr>(S);
+ if (!isa<SCEVCastExpr>(SA->getOperand(i)))
+ hasTrunc = isa<SCEVTruncateExpr>(S);
Operands.push_back(S);
}
if (!hasTrunc)
@@ -1117,13 +1133,14 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
}
// trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
- // eliminate all the truncates.
+ // eliminate all the truncates, or we replace other casts with truncates.
if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
SmallVector<const SCEV *, 4> Operands;
bool hasTrunc = false;
for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
- hasTrunc = isa<SCEVTruncateExpr>(S);
+ if (!isa<SCEVCastExpr>(SM->getOperand(i)))
+ hasTrunc = isa<SCEVTruncateExpr>(S);
Operands.push_back(S);
}
if (!hasTrunc)
@@ -1148,6 +1165,262 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
return S;
}
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the
+// loop does not exceed this limit before incrementing.
+static const SCEV *getSignedOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+ if (SE->isKnownPositive(Step)) {
+ *Pred = ICmpInst::ICMP_SLT;
+ return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMax());
+ }
+ if (SE->isKnownNegative(Step)) {
+ *Pred = ICmpInst::ICMP_SGT;
+ return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMin());
+ }
+ return nullptr;
+}
+
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// unsigned overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+ *Pred = ICmpInst::ICMP_ULT;
+
+ return SE->getConstant(APInt::getMinValue(BitWidth) -
+ SE->getUnsignedRange(Step).getUnsignedMax());
+}
+
+namespace {
+
+struct ExtendOpTraitsBase {
+ typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *);
+};
+
+// Used to make code generic over signed and unsigned overflow.
+template <typename ExtendOp> struct ExtendOpTraits {
+ // Members present:
+ //
+ // static const SCEV::NoWrapFlags WrapType;
+ //
+ // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr;
+ //
+ // static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ // ICmpInst::Predicate *Pred,
+ // ScalarEvolution *SE);
+};
+
+template <>
+struct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase {
+ static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW;
+
+ static const GetExtendExprTy GetExtendExpr;
+
+ static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ return getSignedOverflowLimitForStep(Step, Pred, SE);
+ }
+};
+
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+ SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr;
+
+template <>
+struct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase {
+ static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW;
+
+ static const GetExtendExprTy GetExtendExpr;
+
+ static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ return getUnsignedOverflowLimitForStep(Step, Pred, SE);
+ }
+};
+
+const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits<
+ SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr;
+}
+
+// The recurrence AR has been shown to have no signed/unsigned wrap or something
+// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as
+// easily prove NSW/NUW for its preincrement or postincrement sibling. This
+// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step +
+// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the
+// expression "Step + sext/zext(PreIncAR)" is congruent with
+// "sext/zext(PostIncAR)"
+template <typename ExtendOpTy>
+static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
+ ScalarEvolution *SE) {
+ auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
+ auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
+
+ const Loop *L = AR->getLoop();
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Check for a simple looking step prior to loop entry.
+ const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+ if (!SA)
+ return nullptr;
+
+ // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+ // subtraction is expensive. For this purpose, perform a quick and dirty
+ // difference, by checking for Step in the operand list.
+ SmallVector<const SCEV *, 4> DiffOps;
+ for (const SCEV *Op : SA->operands())
+ if (Op != Step)
+ DiffOps.push_back(Op);
+
+ if (DiffOps.size() == SA->getNumOperands())
+ return nullptr;
+
+ // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` +
+ // `Step`:
+
+ // 1. NSW/NUW flags on the step increment.
+ const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+ const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+ // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies
+ // "S+X does not sign/unsign-overflow".
+ //
+
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ if (PreAR && PreAR->getNoWrapFlags(WrapType) &&
+ !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount))
+ return PreStart;
+
+ // 2. Direct overflow check on the step operation's expression.
+ unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+ Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+ const SCEV *OperandExtendedStart =
+ SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy),
+ (SE->*GetExtendExpr)(Step, WideTy));
+ if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) {
+ if (PreAR && AR->getNoWrapFlags(WrapType)) {
+ // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW
+ // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then
+ // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact.
+ const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType);
+ }
+ return PreStart;
+ }
+
+ // 3. Loop precondition.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit =
+ ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
+
+ if (OverflowLimit &&
+ SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+ return PreStart;
+ }
+ return nullptr;
+}
+
+// Get the normalized zero or sign extended expression for this AddRec's Start.
+template <typename ExtendOpTy>
+static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty,
+ ScalarEvolution *SE) {
+ auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr;
+
+ const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE);
+ if (!PreStart)
+ return (SE->*GetExtendExpr)(AR->getStart(), Ty);
+
+ return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty),
+ (SE->*GetExtendExpr)(PreStart, Ty));
+}
+
+// Try to prove away overflow by looking at "nearby" add recurrences. A
+// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it
+// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`.
+//
+// Formally:
+//
+// {S,+,X} == {S-T,+,X} + T
+// => Ext({S,+,X}) == Ext({S-T,+,X} + T)
+//
+// If ({S-T,+,X} + T) does not overflow ... (1)
+//
+// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T)
+//
+// If {S-T,+,X} does not overflow ... (2)
+//
+// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T)
+// == {Ext(S-T)+Ext(T),+,Ext(X)}
+//
+// If (S-T)+T does not overflow ... (3)
+//
+// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)}
+// == {Ext(S),+,Ext(X)} == LHS
+//
+// Thus, if (1), (2) and (3) are true for some T, then
+// Ext({S,+,X}) == {Ext(S),+,Ext(X)}
+//
+// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T)
+// does not overflow" restricted to the 0th iteration. Therefore we only need
+// to check for (1) and (2).
+//
+// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T
+// is `Delta` (defined below).
+//
+template <typename ExtendOpTy>
+bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
+ const SCEV *Step,
+ const Loop *L) {
+ auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType;
+
+ // We restrict `Start` to a constant to prevent SCEV from spending too much
+ // time here. It is correct (but more expensive) to continue with a
+ // non-constant `Start` and do a general SCEV subtraction to compute
+ // `PreStart` below.
+ //
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start);
+ if (!StartC)
+ return false;
+
+ APInt StartAI = StartC->getValue()->getValue();
+
+ for (unsigned Delta : {-2, -1, 1, 2}) {
+ const SCEV *PreStart = getConstant(StartAI - Delta);
+
+ // Give up if we don't already have the add recurrence we need because
+ // actually constructing an add recurrence is relatively expensive.
+ const SCEVAddRecExpr *PreAR = [&]() {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scAddRecExpr);
+ ID.AddPointer(PreStart);
+ ID.AddPointer(Step);
+ ID.AddPointer(L);
+ void *IP = nullptr;
+ return static_cast<SCEVAddRecExpr *>(
+ this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ }();
+
+ if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2)
+ const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
+ ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+ const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(
+ DeltaS, &Pred, this);
+ if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1)
+ return true;
+ }
+ }
+
+ return false;
+}
+
const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1201,9 +1474,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNUW))
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getZeroExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1240,9 +1513,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getZeroExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
@@ -1255,9 +1528,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Negative step causes unsigned wrap, but it still can't self-wrap.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
@@ -1275,9 +1548,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Cache knowledge of AR NUW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getZeroExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
} else if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
@@ -1290,12 +1563,19 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Negative step causes unsigned wrap, but it still can't self-wrap.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getZeroExtendExpr(Start, Ty),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
}
+
+ if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) {
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ }
}
// The cast wasn't folded; create an explicit cast node.
@@ -1307,104 +1587,6 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
return S;
}
-// Get the limit of a recurrence such that incrementing by Step cannot cause
-// signed overflow as long as the value of the recurrence within the loop does
-// not exceed this limit before incrementing.
-static const SCEV *getOverflowLimitForStep(const SCEV *Step,
- ICmpInst::Predicate *Pred,
- ScalarEvolution *SE) {
- unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
- if (SE->isKnownPositive(Step)) {
- *Pred = ICmpInst::ICMP_SLT;
- return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
- SE->getSignedRange(Step).getSignedMax());
- }
- if (SE->isKnownNegative(Step)) {
- *Pred = ICmpInst::ICMP_SGT;
- return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
- SE->getSignedRange(Step).getSignedMin());
- }
- return nullptr;
-}
-
-// The recurrence AR has been shown to have no signed wrap. Typically, if we can
-// prove NSW for AR, then we can just as easily prove NSW for its preincrement
-// or postincrement sibling. This allows normalizing a sign extended AddRec as
-// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
-// result, the expression "Step + sext(PreIncAR)" is congruent with
-// "sext(PostIncAR)"
-static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
- Type *Ty,
- ScalarEvolution *SE) {
- const Loop *L = AR->getLoop();
- const SCEV *Start = AR->getStart();
- const SCEV *Step = AR->getStepRecurrence(*SE);
-
- // Check for a simple looking step prior to loop entry.
- const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
- if (!SA)
- return nullptr;
-
- // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
- // subtraction is expensive. For this purpose, perform a quick and dirty
- // difference, by checking for Step in the operand list.
- SmallVector<const SCEV *, 4> DiffOps;
- for (const SCEV *Op : SA->operands())
- if (Op != Step)
- DiffOps.push_back(Op);
-
- if (DiffOps.size() == SA->getNumOperands())
- return nullptr;
-
- // This is a postinc AR. Check for overflow on the preinc recurrence using the
- // same three conditions that getSignExtendedExpr checks.
-
- // 1. NSW flags on the step increment.
- const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
- const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
- SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
-
- if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
- return PreStart;
-
- // 2. Direct overflow check on the step operation's expression.
- unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
- Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
- const SCEV *OperandExtendedStart =
- SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
- SE->getSignExtendExpr(Step, WideTy));
- if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
- // Cache knowledge of PreAR NSW.
- if (PreAR)
- const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
- // FIXME: this optimization needs a unit test
- DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
- return PreStart;
- }
-
- // 3. Loop precondition.
- ICmpInst::Predicate Pred;
- const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
-
- if (OverflowLimit &&
- SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
- return PreStart;
- }
- return nullptr;
-}
-
-// Get the normalized sign-extended expression for this AddRec's Start.
-static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
- Type *Ty,
- ScalarEvolution *SE) {
- const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
- if (!PreStart)
- return SE->getSignExtendExpr(AR->getStart(), Ty);
-
- return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
- SE->getSignExtendExpr(PreStart, Ty));
-}
-
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1483,9 +1665,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNSW))
- return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
- getSignExtendExpr(Step, Ty),
- L, SCEV::FlagNSW);
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW);
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
@@ -1522,9 +1704,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Cache knowledge of AR NSW, which is propagated to this AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
@@ -1533,12 +1715,20 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
getMulExpr(WideMaxBECount,
getZeroExtendExpr(Step, WideTy)));
if (SAdd == OperandExtendedAdd) {
- // Cache knowledge of AR NSW, which is propagated to this AddRec.
- const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ // If AR wraps around then
+ //
+ // abs(Step) * MaxBECount > unsigned-max(AR->getType())
+ // => SAdd != OperandExtendedAdd
+ //
+ // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=>
+ // (SAdd == OperandExtendedAdd => AR is NW)
+
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
- getZeroExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
@@ -1547,7 +1737,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// with the start value and the backedge is guarded by a comparison
// with the post-inc value, the addrec is safe.
ICmpInst::Predicate Pred;
- const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+ const SCEV *OverflowLimit =
+ getSignedOverflowLimitForStep(Step, &Pred, this);
if (OverflowLimit &&
(isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
(isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
@@ -1555,9 +1746,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
OverflowLimit)))) {
// Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
- return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
- getSignExtendExpr(Step, Ty),
- L, AR->getNoWrapFlags());
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
}
}
// If Start and Step are constants, check if we can apply this
@@ -1576,6 +1767,13 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
}
}
+
+ if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) {
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ return getAddRecExpr(
+ getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this),
+ getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags());
+ }
}
// The cast wasn't folded; create an explicit cast node.
@@ -2169,8 +2367,7 @@ static bool containsConstantSomewhere(const SCEV *StartExpr) {
if (isa<SCEVAddExpr>(*CurrentExpr) || isa<SCEVMulExpr>(*CurrentExpr)) {
const auto *CurrentNAry = cast<SCEVNAryExpr>(CurrentExpr);
- for (const SCEV *Operand : CurrentNAry->operands())
- Ops.push_back(Operand);
+ Ops.append(CurrentNAry->op_begin(), CurrentNAry->op_end());
}
}
return false;
@@ -2729,6 +2926,56 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
return S;
}
+const SCEV *
+ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
+ const SmallVectorImpl<const SCEV *> &IndexExprs,
+ bool InBounds) {
+ // getSCEV(Base)->getType() has the same address space as Base->getType()
+ // because SCEV::getType() preserves the address space.
+ Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
+ // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
+ // instruction to its SCEV, because the Instruction may be guarded by control
+ // flow and the no-overflow bits may not be valid for the expression in any
+ // context.
+ SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+ const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+ // The address space is unimportant. The first thing we do on CurTy is getting
+ // its element type.
+ Type *CurTy = PointerType::getUnqual(PointeeType);
+ for (const SCEV *IndexExpr : IndexExprs) {
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (StructType *STy = dyn_cast<StructType>(CurTy)) {
+ // For a struct, add the member offset.
+ ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue();
+ unsigned FieldNo = Index->getZExtValue();
+ const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
+
+ // Add the field offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+
+ // Update CurTy to the type of the field at Index.
+ CurTy = STy->getTypeAtIndex(Index);
+ } else {
+ // Update CurTy to its element type.
+ CurTy = cast<SequentialType>(CurTy)->getElementType();
+ // For an array, add the element offset, explicitly scaled.
+ const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, CurTy);
+ // Getelementptr indices are signed.
+ IndexExpr = getTruncateOrSignExtend(IndexExpr, IntPtrTy);
+
+ // Multiply the index by the element size to compute the element offset.
+ const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap);
+
+ // Add the element offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+ }
+ }
+
+ // Add the total offset from all the GEP indices to the base.
+ return getAddExpr(BaseExpr, TotalOffset, Wrap);
+}
+
const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
const SCEV *RHS) {
SmallVector<const SCEV *, 2> Ops;
@@ -2950,39 +3197,23 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
}
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
- // If we have DataLayout, we can bypass creating a target-independent
+ // We can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (DL)
- return getConstant(IntTy, DL->getTypeAllocSize(AllocTy));
-
- Constant *C = ConstantExpr::getSizeOf(AllocTy);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
- C = Folded;
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
- assert(Ty == IntTy && "Effective SCEV type doesn't match");
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
+ return getConstant(IntTy,
+ F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
}
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
StructType *STy,
unsigned FieldNo) {
- // If we have DataLayout, we can bypass creating a target-independent
+ // We can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (DL) {
- return getConstant(IntTy,
- DL->getStructLayout(STy)->getElementOffset(FieldNo));
- }
-
- Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
- C = Folded;
-
- Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
+ return getConstant(
+ IntTy,
+ F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
+ FieldNo));
}
const SCEV *ScalarEvolution::getUnknown(Value *V) {
@@ -3024,19 +3255,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {
/// for which isSCEVable must return true.
uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
-
- // If we have a DataLayout, use it!
- if (DL)
- return DL->getTypeSizeInBits(Ty);
-
- // Integer types have fixed sizes.
- if (Ty->isIntegerTy())
- return Ty->getPrimitiveSizeInBits();
-
- // The only other support type is pointer. Without DataLayout, conservatively
- // assume pointers are 64-bit.
- assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
- return 64;
+ return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
}
/// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -3052,12 +3271,7 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-
- if (DL)
- return DL->getIntPtrType(Ty);
-
- // Without DataLayout, conservatively assume pointers are 64-bit.
- return Type::getInt64Ty(getContext());
+ return F->getParent()->getDataLayout().getIntPtrType(Ty);
}
const SCEV *ScalarEvolution::getCouldNotCompute() {
@@ -3444,10 +3658,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// If the increment doesn't overflow, then neither the addrec nor
// the post-increment will overflow.
if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
- if (OBO->hasNoUnsignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNUW);
- if (OBO->hasNoSignedWrap())
- Flags = setFlags(Flags, SCEV::FlagNSW);
+ if (OBO->getOperand(0) == PN) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNSW);
+ }
} else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
// If the increment is an inbounds GEP, then we know the address
// space cannot be wrapped around. We cannot make any guarantee
@@ -3455,7 +3671,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// unsigned but we may have a negative index from the base
// pointer. We can guarantee that no unsigned wrap occurs if the
// indices form a positive value.
- if (GEP->isInBounds()) {
+ if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
Flags = setFlags(Flags, SCEV::FlagNW);
const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
@@ -3521,7 +3737,8 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AC))
+ if (Value *V =
+ SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
@@ -3533,52 +3750,16 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
/// operations. This allows them to be analyzed by regular SCEV code.
///
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
- Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
Value *Base = GEP->getOperand(0);
// Don't attempt to analyze GEPs over unsized objects.
if (!Base->getType()->getPointerElementType()->isSized())
return getUnknown(GEP);
- // Don't blindly transfer the inbounds flag from the GEP instruction to the
- // Add expression, because the Instruction may be guarded by control flow
- // and the no-overflow bits may not be valid for the expression in any
- // context.
- SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
-
- const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()),
- E = GEP->op_end();
- I != E; ++I) {
- Value *Index = *I;
- // Compute the (potentially symbolic) offset in bytes for this index.
- if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
- // For a struct, add the member offset.
- unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
-
- // Add the field offset to the running total offset.
- TotalOffset = getAddExpr(TotalOffset, FieldOffset);
- } else {
- // For an array, add the element offset, explicitly scaled.
- const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
- const SCEV *IndexS = getSCEV(Index);
- // Getelementptr indices are signed.
- IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
-
- // Multiply the index by the element size to compute the element offset.
- const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
-
- // Add the element offset to the running total offset.
- TotalOffset = getAddExpr(TotalOffset, LocalOffset);
- }
- }
-
- // Get the SCEV for the GEP base.
- const SCEV *BaseS = getSCEV(Base);
-
- // Add the total offset from all the GEP indices to the base.
- return getAddExpr(BaseS, TotalOffset, Wrap);
+ SmallVector<const SCEV *, 4> IndexExprs;
+ for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
+ IndexExprs.push_back(getSCEV(*Index));
+ return getGEPExpr(GEP->getSourceElementType(), getSCEV(Base), IndexExprs,
+ GEP->isInBounds());
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3653,7 +3834,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+ computeKnownBits(U->getValue(), Zeros, Ones,
+ F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
return Zeros.countTrailingOnes();
}
@@ -3688,79 +3870,93 @@ static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
return None;
}
-/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+/// getRange - Determine the range for a particular SCEV. If SignHint is
+/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges
+/// with a "cleaner" unsigned (resp. signed) representation.
///
ConstantRange
-ScalarEvolution::getUnsignedRange(const SCEV *S) {
+ScalarEvolution::getRange(const SCEV *S,
+ ScalarEvolution::RangeSignHint SignHint) {
+ DenseMap<const SCEV *, ConstantRange> &Cache =
+ SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges
+ : SignedRanges;
+
// See if we've computed this range already.
- DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
- if (I != UnsignedRanges.end())
+ DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S);
+ if (I != Cache.end())
return I->second;
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
+ return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
unsigned BitWidth = getTypeSizeInBits(S->getType());
ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
- // If the value has known zeros, the maximum unsigned value will have those
- // known zeros as well.
+ // If the value has known zeros, the maximum value will have those known zeros
+ // as well.
uint32_t TZ = GetMinTrailingZeros(S);
- if (TZ != 0)
- ConservativeResult =
- ConstantRange(APInt::getMinValue(BitWidth),
- APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+ if (TZ != 0) {
+ if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
+ ConservativeResult =
+ ConstantRange(APInt::getMinValue(BitWidth),
+ APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+ else
+ ConservativeResult = ConstantRange(
+ APInt::getSignedMinValue(BitWidth),
+ APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+ }
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- ConstantRange X = getUnsignedRange(Add->getOperand(0));
+ ConstantRange X = getRange(Add->getOperand(0), SignHint);
for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
- X = X.add(getUnsignedRange(Add->getOperand(i)));
- return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
+ X = X.add(getRange(Add->getOperand(i), SignHint));
+ return setRange(Add, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
- ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+ ConstantRange X = getRange(Mul->getOperand(0), SignHint);
for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
- X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
- return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
+ X = X.multiply(getRange(Mul->getOperand(i), SignHint));
+ return setRange(Mul, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
- ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+ ConstantRange X = getRange(SMax->getOperand(0), SignHint);
for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
- X = X.smax(getUnsignedRange(SMax->getOperand(i)));
- return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
+ X = X.smax(getRange(SMax->getOperand(i), SignHint));
+ return setRange(SMax, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
- ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+ ConstantRange X = getRange(UMax->getOperand(0), SignHint);
for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
- X = X.umax(getUnsignedRange(UMax->getOperand(i)));
- return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
+ X = X.umax(getRange(UMax->getOperand(i), SignHint));
+ return setRange(UMax, SignHint, ConservativeResult.intersectWith(X));
}
if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
- ConstantRange X = getUnsignedRange(UDiv->getLHS());
- ConstantRange Y = getUnsignedRange(UDiv->getRHS());
- return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+ ConstantRange X = getRange(UDiv->getLHS(), SignHint);
+ ConstantRange Y = getRange(UDiv->getRHS(), SignHint);
+ return setRange(UDiv, SignHint,
+ ConservativeResult.intersectWith(X.udiv(Y)));
}
if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
- ConstantRange X = getUnsignedRange(ZExt->getOperand());
- return setUnsignedRange(ZExt,
- ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+ ConstantRange X = getRange(ZExt->getOperand(), SignHint);
+ return setRange(ZExt, SignHint,
+ ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
}
if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
- ConstantRange X = getUnsignedRange(SExt->getOperand());
- return setUnsignedRange(SExt,
- ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+ ConstantRange X = getRange(SExt->getOperand(), SignHint);
+ return setRange(SExt, SignHint,
+ ConservativeResult.intersectWith(X.signExtend(BitWidth)));
}
if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
- ConstantRange X = getUnsignedRange(Trunc->getOperand());
- return setUnsignedRange(Trunc,
- ConservativeResult.intersectWith(X.truncate(BitWidth)));
+ ConstantRange X = getRange(Trunc->getOperand(), SignHint);
+ return setRange(Trunc, SignHint,
+ ConservativeResult.intersectWith(X.truncate(BitWidth)));
}
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
@@ -3773,143 +3969,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
ConservativeResult.intersectWith(
ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
- // TODO: non-affine addrec
- if (AddRec->isAffine()) {
- Type *Ty = AddRec->getType();
- const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
- if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
- getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
- MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
-
- const SCEV *Start = AddRec->getStart();
- const SCEV *Step = AddRec->getStepRecurrence(*this);
-
- ConstantRange StartRange = getUnsignedRange(Start);
- ConstantRange StepRange = getSignedRange(Step);
- ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
- ConstantRange EndRange =
- StartRange.add(MaxBECountRange.multiply(StepRange));
-
- // Check for overflow. This must be done with ConstantRange arithmetic
- // because we could be called from within the ScalarEvolution overflow
- // checking code.
- ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
- ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
- ConstantRange ExtMaxBECountRange =
- MaxBECountRange.zextOrTrunc(BitWidth*2+1);
- ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
- if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
- ExtEndRange)
- return setUnsignedRange(AddRec, ConservativeResult);
-
- APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
- EndRange.getUnsignedMin());
- APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
- EndRange.getUnsignedMax());
- if (Min.isMinValue() && Max.isMaxValue())
- return setUnsignedRange(AddRec, ConservativeResult);
- return setUnsignedRange(AddRec,
- ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
- }
- }
-
- return setUnsignedRange(AddRec, ConservativeResult);
- }
-
- if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
- // Check if the IR explicitly contains !range metadata.
- Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
- if (MDRange.hasValue())
- ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
-
- // For a SCEVUnknown, ask ValueTracking.
- APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
- if (Ones == ~Zeros + 1)
- return setUnsignedRange(U, ConservativeResult);
- return setUnsignedRange(U,
- ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
- }
-
- return setUnsignedRange(S, ConservativeResult);
-}
-
-/// getSignedRange - Determine the signed range for a particular SCEV.
-///
-ConstantRange
-ScalarEvolution::getSignedRange(const SCEV *S) {
- // See if we've computed this range already.
- DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
- if (I != SignedRanges.end())
- return I->second;
-
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
- return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
-
- unsigned BitWidth = getTypeSizeInBits(S->getType());
- ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
-
- // If the value has known zeros, the maximum signed value will have those
- // known zeros as well.
- uint32_t TZ = GetMinTrailingZeros(S);
- if (TZ != 0)
- ConservativeResult =
- ConstantRange(APInt::getSignedMinValue(BitWidth),
- APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
-
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- ConstantRange X = getSignedRange(Add->getOperand(0));
- for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
- X = X.add(getSignedRange(Add->getOperand(i)));
- return setSignedRange(Add, ConservativeResult.intersectWith(X));
- }
-
- if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
- ConstantRange X = getSignedRange(Mul->getOperand(0));
- for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
- X = X.multiply(getSignedRange(Mul->getOperand(i)));
- return setSignedRange(Mul, ConservativeResult.intersectWith(X));
- }
-
- if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
- ConstantRange X = getSignedRange(SMax->getOperand(0));
- for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
- X = X.smax(getSignedRange(SMax->getOperand(i)));
- return setSignedRange(SMax, ConservativeResult.intersectWith(X));
- }
-
- if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
- ConstantRange X = getSignedRange(UMax->getOperand(0));
- for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
- X = X.umax(getSignedRange(UMax->getOperand(i)));
- return setSignedRange(UMax, ConservativeResult.intersectWith(X));
- }
-
- if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
- ConstantRange X = getSignedRange(UDiv->getLHS());
- ConstantRange Y = getSignedRange(UDiv->getRHS());
- return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
- }
-
- if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
- ConstantRange X = getSignedRange(ZExt->getOperand());
- return setSignedRange(ZExt,
- ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
- }
-
- if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
- ConstantRange X = getSignedRange(SExt->getOperand());
- return setSignedRange(SExt,
- ConservativeResult.intersectWith(X.signExtend(BitWidth)));
- }
-
- if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
- ConstantRange X = getSignedRange(Trunc->getOperand());
- return setSignedRange(Trunc,
- ConservativeResult.intersectWith(X.truncate(BitWidth)));
- }
-
- if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
// If there's no signed wrap, and all the operands have the same sign or
// zero, the value won't ever change sign.
if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) {
@@ -3935,41 +3994,66 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+
+ // Check for overflow. This must be done with ConstantRange arithmetic
+ // because we could be called from within the ScalarEvolution overflow
+ // checking code.
+
MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+ ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+ ConstantRange ZExtMaxBECountRange =
+ MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*this);
+ ConstantRange StepSRange = getSignedRange(Step);
+ ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
+
+ ConstantRange StartURange = getUnsignedRange(Start);
+ ConstantRange EndURange =
+ StartURange.add(MaxBECountRange.multiply(StepSRange));
+
+ // Check for unsigned overflow.
+ ConstantRange ZExtStartURange =
+ StartURange.zextOrTrunc(BitWidth * 2 + 1);
+ ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
+ if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
+ ZExtEndURange) {
+ APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
+ EndURange.getUnsignedMin());
+ APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
+ EndURange.getUnsignedMax());
+ bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
+ if (!IsFullRange)
+ ConservativeResult =
+ ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
+ }
- ConstantRange StartRange = getSignedRange(Start);
- ConstantRange StepRange = getSignedRange(Step);
- ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
- ConstantRange EndRange =
- StartRange.add(MaxBECountRange.multiply(StepRange));
-
- // Check for overflow. This must be done with ConstantRange arithmetic
- // because we could be called from within the ScalarEvolution overflow
- // checking code.
- ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
- ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
- ConstantRange ExtMaxBECountRange =
- MaxBECountRange.zextOrTrunc(BitWidth*2+1);
- ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
- if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
- ExtEndRange)
- return setSignedRange(AddRec, ConservativeResult);
-
- APInt Min = APIntOps::smin(StartRange.getSignedMin(),
- EndRange.getSignedMin());
- APInt Max = APIntOps::smax(StartRange.getSignedMax(),
- EndRange.getSignedMax());
- if (Min.isMinSignedValue() && Max.isMaxSignedValue())
- return setSignedRange(AddRec, ConservativeResult);
- return setSignedRange(AddRec,
- ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+ ConstantRange StartSRange = getSignedRange(Start);
+ ConstantRange EndSRange =
+ StartSRange.add(MaxBECountRange.multiply(StepSRange));
+
+ // Check for signed overflow. This must be done with ConstantRange
+ // arithmetic because we could be called from within the ScalarEvolution
+ // overflow checking code.
+ ConstantRange SExtStartSRange =
+ StartSRange.sextOrTrunc(BitWidth * 2 + 1);
+ ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
+ if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
+ SExtEndSRange) {
+ APInt Min = APIntOps::smin(StartSRange.getSignedMin(),
+ EndSRange.getSignedMin());
+ APInt Max = APIntOps::smax(StartSRange.getSignedMax(),
+ EndSRange.getSignedMax());
+ bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
+ if (!IsFullRange)
+ ConservativeResult =
+ ConservativeResult.intersectWith(ConstantRange(Min, Max + 1));
+ }
}
}
- return setSignedRange(AddRec, ConservativeResult);
+ return setRange(AddRec, SignHint, ConservativeResult);
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -3978,18 +4062,31 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
if (MDRange.hasValue())
ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
- // For a SCEVUnknown, ask ValueTracking.
- if (!U->getValue()->getType()->isIntegerTy() && !DL)
- return setSignedRange(U, ConservativeResult);
- unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
- if (NS <= 1)
- return setSignedRange(U, ConservativeResult);
- return setSignedRange(U, ConservativeResult.intersectWith(
- ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
- APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
+ // Split here to avoid paying the compile-time cost of calling both
+ // computeKnownBits and ComputeNumSignBits. This restriction can be lifted
+ // if needed.
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
+ // For a SCEVUnknown, ask ValueTracking.
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+ if (Ones != ~Zeros + 1)
+ ConservativeResult =
+ ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
+ } else {
+ assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
+ "generalize as needed!");
+ unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
+ if (NS > 1)
+ ConservativeResult = ConservativeResult.intersectWith(
+ ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+ APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
+ }
+
+ return setRange(U, SignHint, ConservativeResult);
}
- return setSignedRange(S, ConservativeResult);
+ return setRange(S, SignHint, ConservativeResult);
}
/// createSCEV - We know that there is no SCEV for the specified value.
@@ -4088,8 +4185,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, 0, AC,
- nullptr, DT);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
+ F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4280,9 +4377,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
case ICmpInst::ICMP_SGE:
// a >s b ? a+x : b+x -> smax(a, b)+x
// a >s b ? b+x : a+x -> smin(a, b)+x
- if (LHS->getType() == U->getType()) {
- const SCEV *LS = getSCEV(LHS);
- const SCEV *RS = getSCEV(RHS);
+ if (getTypeSizeInBits(LHS->getType()) <=
+ getTypeSizeInBits(U->getType())) {
+ const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
+ const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4303,9 +4401,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
case ICmpInst::ICMP_UGE:
// a >u b ? a+x : b+x -> umax(a, b)+x
// a >u b ? b+x : a+x -> umin(a, b)+x
- if (LHS->getType() == U->getType()) {
- const SCEV *LS = getSCEV(LHS);
- const SCEV *RS = getSCEV(RHS);
+ if (getTypeSizeInBits(LHS->getType()) <=
+ getTypeSizeInBits(U->getType())) {
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
+ const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4320,11 +4419,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
case ICmpInst::ICMP_NE:
// n != 0 ? n+x : 1+x -> umax(n, 1)+x
- if (LHS->getType() == U->getType() &&
- isa<ConstantInt>(RHS) &&
- cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getConstant(LHS->getType(), 1);
- const SCEV *LS = getSCEV(LHS);
+ if (getTypeSizeInBits(LHS->getType()) <=
+ getTypeSizeInBits(U->getType()) &&
+ isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(U->getType(), 1);
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, LS);
@@ -4335,11 +4434,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
case ICmpInst::ICMP_EQ:
// n == 0 ? 1+x : n+x -> umax(n, 1)+x
- if (LHS->getType() == U->getType() &&
- isa<ConstantInt>(RHS) &&
- cast<ConstantInt>(RHS)->isZero()) {
- const SCEV *One = getConstant(LHS->getType(), 1);
- const SCEV *LS = getSCEV(LHS);
+ if (getTypeSizeInBits(LHS->getType()) <=
+ getTypeSizeInBits(U->getType()) &&
+ isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+ const SCEV *One = getConstant(U->getType(), 1);
+ const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
const SCEV *LA = getSCEV(U->getOperand(1));
const SCEV *RA = getSCEV(U->getOperand(2));
const SCEV *LDiff = getMinusSCEV(LA, One);
@@ -5238,12 +5337,9 @@ static bool canConstantEvolve(Instruction *I, const Loop *L) {
if (!L->contains(I)) return false;
if (isa<PHINode>(I)) {
- if (L->getHeader() == I->getParent())
- return true;
- else
- // We don't currently keep track of the control flow needed to evaluate
- // PHIs, so we cannot handle PHIs inside of loops.
- return false;
+ // We don't currently keep track of the control flow needed to evaluate
+ // PHIs, so we cannot handle PHIs inside of loops.
+ return L->getHeader() == I->getParent();
}
// If we won't be able to constant fold this expression even if the operands
@@ -5314,7 +5410,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
/// reason, return null.
static Constant *EvaluateExpression(Value *V, const Loop *L,
DenseMap<Instruction *, Constant *> &Vals,
- const DataLayout *DL,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
@@ -5403,6 +5499,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
+ const DataLayout &DL = F->getParent()->getDataLayout();
for (; ; ++IterationNum) {
if (IterationNum == NumIterations)
return RetVal = CurrentIterVals[PN]; // Got exit value!
@@ -5410,8 +5507,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
// Compute the value of the PHIs for the next iteration.
// EvaluateExpression adds non-phi values to the CurrentIterVals map.
DenseMap<Instruction *, Constant *> NextIterVals;
- Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
- TLI);
+ Constant *NextPHI =
+ EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
if (!NextPHI)
return nullptr; // Couldn't evaluate!
NextIterVals[PN] = NextPHI;
@@ -5487,12 +5584,11 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
// Okay, we find a PHI node that defines the trip count of this loop. Execute
// the loop symbolically to determine when the condition gets a value of
// "ExitWhen".
-
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
+ const DataLayout &DL = F->getParent()->getDataLayout();
for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
- ConstantInt *CondVal =
- dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
- DL, TLI));
+ ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
+ EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
@@ -5623,7 +5719,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
if (PTy->getElementType()->isStructTy())
C2 = ConstantExpr::getIntegerCast(
C2, Type::getInt32Ty(C->getContext()), true);
- C = ConstantExpr::getGetElementPtr(C, C2);
+ C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2);
} else
C = ConstantExpr::getAdd(C, C2);
}
@@ -5725,16 +5821,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if getSCEVAtScope actually made an improvement.
if (MadeImprovement) {
Constant *C = nullptr;
+ const DataLayout &DL = F->getParent()->getDataLayout();
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- C = ConstantFoldCompareInstOperands(CI->getPredicate(),
- Operands[0], Operands[1], DL,
- TLI);
+ C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+ Operands[1], DL, TLI);
else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (!LI->isVolatile())
C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
} else
- C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- Operands, DL, TLI);
+ C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
+ DL, TLI);
if (!C) return V;
return getSCEV(C);
}
@@ -6016,7 +6112,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
R1->getValue(),
R2->getValue()))) {
- if (CB->getZExtValue() == false)
+ if (!CB->getZExtValue())
std::swap(R1, R2); // R1 is the minimum root now.
// We can only use this value if the chrec ends up with an exact zero
@@ -6631,6 +6727,65 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
return true;
}
+ struct ClearWalkingBEDominatingCondsOnExit {
+ ScalarEvolution &SE;
+
+ explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
+ : SE(SE){};
+
+ ~ClearWalkingBEDominatingCondsOnExit() {
+ SE.WalkingBEDominatingConds = false;
+ }
+ };
+
+ // We don't want more than one activation of the following loop on the stack
+ // -- that can lead to O(n!) time complexity.
+ if (WalkingBEDominatingConds)
+ return false;
+
+ WalkingBEDominatingConds = true;
+ ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
+
+ // If the loop is not reachable from the entry block, we risk running into an
+ // infinite loop as we walk up into the dom tree. These loops do not matter
+ // anyway, so we just return a conservative answer when we see them.
+ if (!DT->isReachableFromEntry(L->getHeader()))
+ return false;
+
+ for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
+ DTN != HeaderDTN;
+ DTN = DTN->getIDom()) {
+
+ assert(DTN && "should reach the loop header before reaching the root!");
+
+ BasicBlock *BB = DTN->getBlock();
+ BasicBlock *PBB = BB->getSinglePredecessor();
+ if (!PBB)
+ continue;
+
+ BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator());
+ if (!ContinuePredicate || !ContinuePredicate->isConditional())
+ continue;
+
+ Value *Condition = ContinuePredicate->getCondition();
+
+ // If we have an edge `E` within the loop body that dominates the only
+ // latch, the condition guarding `E` also guards the backedge. This
+ // reasoning works only for loops with a single latch.
+
+ BasicBlockEdge DominatingEdge(PBB, BB);
+ if (DominatingEdge.isSingleEdge()) {
+ // We're constructively (and conservatively) enumerating edges within the
+ // loop body that dominate the latch. The dominator tree better agree
+ // with us on this:
+ assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+
+ if (isImpliedCond(Pred, LHS, RHS, Condition,
+ BB != ContinuePredicate->getSuccessor(0)))
+ return true;
+ }
+ }
+
return false;
}
@@ -6726,15 +6881,6 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
if (!ICI) return false;
- // Bail if the ICmp's operands' types are wider than the needed type
- // before attempting to call getSCEV on them. This avoids infinite
- // recursion, since the analysis of widening casts can require loop
- // exit condition information for overflow checking, which would
- // lead back here.
- if (getTypeSizeInBits(LHS->getType()) <
- getTypeSizeInBits(ICI->getOperand(0)->getType()))
- return false;
-
// Now that we found a conditional branch that dominates the loop or controls
// the loop latch. Check to see if it is the comparison we are looking for.
ICmpInst::Predicate FoundPred;
@@ -6746,9 +6892,17 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
- // Balance the types. The case where FoundLHS' type is wider than
- // LHS' type is checked for above.
- if (getTypeSizeInBits(LHS->getType()) >
+ // Balance the types.
+ if (getTypeSizeInBits(LHS->getType()) <
+ getTypeSizeInBits(FoundLHS->getType())) {
+ if (CmpInst::isSigned(Pred)) {
+ LHS = getSignExtendExpr(LHS, FoundLHS->getType());
+ RHS = getSignExtendExpr(RHS, FoundLHS->getType());
+ } else {
+ LHS = getZeroExtendExpr(LHS, FoundLHS->getType());
+ RHS = getZeroExtendExpr(RHS, FoundLHS->getType());
+ }
+ } else if (getTypeSizeInBits(LHS->getType()) >
getTypeSizeInBits(FoundLHS->getType())) {
if (CmpInst::isSigned(FoundPred)) {
FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
@@ -6874,6 +7028,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
+ if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
FoundLHS, FoundRHS) ||
// ~x < ~y --> x > y
@@ -7011,8 +7168,49 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
return false;
}
-// Verify if an linear IV with positive stride can overflow when in a
-// less-than comparison, knowing the invariant term of the comparison, the
+/// isImpliedCondOperandsViaRanges - helper function for isImpliedCondOperands.
+/// Tries to get cases like "X `sgt` 0 => X - 1 `sgt` -1".
+bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS) {
+ if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS))
+ // The restriction on `FoundRHS` be lifted easily -- it exists only to
+ // reduce the compile time impact of this optimization.
+ return false;
+
+ const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
+ if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
+ !isa<SCEVConstant>(AddLHS->getOperand(0)))
+ return false;
+
+ APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue();
+
+ // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
+ // antecedent "`FoundLHS` `Pred` `FoundRHS`".
+ ConstantRange FoundLHSRange =
+ ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
+
+ // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
+ // for `LHS`:
+ APInt Addend =
+ cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue();
+ ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
+
+ // We can also compute the range of values for `LHS` that satisfy the
+ // consequent, "`LHS` `Pred` `RHS`":
+ APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue();
+ ConstantRange SatisfyingLHSRange =
+ ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
+
+ // The antecedent implies the consequent if every value of `LHS` that
+ // satisfies the antecedent also satisfies the consequent.
+ return SatisfyingLHSRange.contains(LHSRange);
+}
+
+// Verify if an linear IV with positive stride can overflow when in a
+// less-than comparison, knowing the invariant term of the comparison, the
// stride and the knowledge of NSW/NUW flags on the recurrence.
bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
bool IsSigned, bool NoWrap) {
@@ -7040,7 +7238,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
}
-// Verify if an linear IV with negative stride can overflow when in a
+// Verify if an linear IV with negative stride can overflow when in a
// greater-than comparison, knowing the invariant term of the comparison,
// the stride and the knowledge of NSW/NUW flags on the recurrence.
bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
@@ -7071,7 +7269,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
// Compute the backedge taken count knowing the interval difference, the
// stride and presence of the equality in the comparison.
-const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
bool Equality) {
const SCEV *One = getConstant(Step->getType(), 1);
Delta = Equality ? getAddExpr(Delta, Step)
@@ -7111,7 +7309,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
// Avoid proven overflow cases: this will ensure that the backedge taken count
// will not generate any unsigned overflow. Relaxed no-overflow conditions
- // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
// behaviors like the case of C language.
if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
return getCouldNotCompute();
@@ -7191,7 +7389,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
// Avoid proven overflow cases: this will ensure that the backedge taken count
// will not generate any unsigned overflow. Relaxed no-overflow conditions
- // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
// behaviors like the case of C language.
if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
return getCouldNotCompute();
@@ -7239,7 +7437,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
if (isa<SCEVConstant>(BECount))
MaxBECount = BECount;
else
- MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
+ MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
getConstant(MinStride), false);
if (isa<SCEVCouldNotCompute>(MaxBECount))
@@ -7339,7 +7537,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
if (ConstantInt *CB =
dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
R1->getValue(), R2->getValue()))) {
- if (CB->getZExtValue() == false)
+ if (!CB->getZExtValue())
std::swap(R1, R2); // R1 is the minimum root now.
// Make sure the root is not off by one. The returned iteration should
@@ -7858,18 +8056,16 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
- BlockDispositions(64), FirstUnknown(nullptr) {
+ : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
+ LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- LI = &getAnalysis<LoopInfo>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- TLI = &getAnalysis<TargetLibraryInfo>();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return false;
}
@@ -7892,6 +8088,7 @@ void ScalarEvolution::releaseMemory() {
}
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+ assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
BackedgeTakenCounts.clear();
ConstantEvolutionLoopExitValue.clear();
@@ -7907,9 +8104,9 @@ void ScalarEvolution::releaseMemory() {
void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequiredTransitive<LoopInfo>();
+ AU.addRequiredTransitive<LoopInfoWrapperPass>();
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfo>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -7969,6 +8166,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
OS << " --> ";
const SCEV *SV = SE.getSCEV(&*I);
SV->print(OS);
+ if (!isa<SCEVCouldNotCompute>(SV)) {
+ OS << " U: ";
+ SE.getUnsignedRange(SV).print(OS);
+ OS << " S: ";
+ SE.getSignedRange(SV).print(OS);
+ }
const Loop *L = LI->getLoopFor((*I).getParent());
@@ -7976,6 +8179,12 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
if (AtUse != SV) {
OS << " --> ";
AtUse->print(OS);
+ if (!isa<SCEVCouldNotCompute>(AtUse)) {
+ OS << " U: ";
+ SE.getUnsignedRange(AtUse).print(OS);
+ OS << " S: ";
+ SE.getSignedRange(AtUse).print(OS);
+ }
}
if (L) {
@@ -8000,17 +8209,17 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
ScalarEvolution::LoopDisposition
ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
- SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
- for (unsigned u = 0; u < Values.size(); u++) {
- if (Values[u].first == L)
- return Values[u].second;
+ auto &Values = LoopDispositions[S];
+ for (auto &V : Values) {
+ if (V.getPointer() == L)
+ return V.getInt();
}
- Values.push_back(std::make_pair(L, LoopVariant));
+ Values.emplace_back(L, LoopVariant);
LoopDisposition D = computeLoopDisposition(S, L);
- SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
- for (unsigned u = Values2.size(); u > 0; u--) {
- if (Values2[u - 1].first == L) {
- Values2[u - 1].second = D;
+ auto &Values2 = LoopDispositions[S];
+ for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+ if (V.getPointer() == L) {
+ V.setInt(D);
break;
}
}
@@ -8106,17 +8315,17 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
ScalarEvolution::BlockDisposition
ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
- SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
- for (unsigned u = 0; u < Values.size(); u++) {
- if (Values[u].first == BB)
- return Values[u].second;
+ auto &Values = BlockDispositions[S];
+ for (auto &V : Values) {
+ if (V.getPointer() == BB)
+ return V.getInt();
}
- Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
+ Values.emplace_back(BB, DoesNotDominateBlock);
BlockDisposition D = computeBlockDisposition(S, BB);
- SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
- for (unsigned u = Values2.size(); u > 0; u--) {
- if (Values2[u - 1].first == BB) {
- Values2[u - 1].second = D;
+ auto &Values2 = BlockDispositions[S];
+ for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+ if (V.getPointer() == BB) {
+ V.setInt(D);
break;
}
}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 5c339ee..ccec0a8 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -79,7 +80,7 @@ ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool
ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
- InitializeAliasAnalysis(this);
+ InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
SE = &getAnalysis<ScalarEvolution>();
return false;
}
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 7e9e351..0bd427b 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -23,7 +23,9 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -204,11 +206,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
/// check to see if the divide was folded.
-static bool FactorOutConstant(const SCEV *&S,
- const SCEV *&Remainder,
- const SCEV *Factor,
- ScalarEvolution &SE,
- const DataLayout *DL) {
+static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
+ const SCEV *Factor, ScalarEvolution &SE,
+ const DataLayout &DL) {
// Everything is divisible by one.
if (Factor->isOne())
return true;
@@ -248,35 +248,17 @@ static bool FactorOutConstant(const SCEV *&S,
// In a Mul, check if there is a constant operand which is a multiple
// of the given factor.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
- if (DL) {
- // With DataLayout, the size is known. Check if there is a constant
- // operand which is a multiple of the given factor. If so, we can
- // factor it.
- const SCEVConstant *FC = cast<SCEVConstant>(Factor);
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
- if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
- SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
- NewMulOps[0] =
- SE.getConstant(C->getValue()->getValue().sdiv(
- FC->getValue()->getValue()));
- S = SE.getMulExpr(NewMulOps);
- return true;
- }
- } else {
- // Without DataLayout, check if Factor can be factored out of any of the
- // Mul's operands. If so, we can just remove it.
- for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
- const SCEV *SOp = M->getOperand(i);
- const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
- if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) &&
- Remainder->isZero()) {
- SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
- NewMulOps[i] = SOp;
- S = SE.getMulExpr(NewMulOps);
- return true;
- }
+ // Size is known, check if there is a constant operand which is a multiple
+ // of the given factor. If so, we can factor it.
+ const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+ if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+ SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+ NewMulOps[0] = SE.getConstant(
+ C->getValue()->getValue().sdiv(FC->getValue()->getValue()));
+ S = SE.getMulExpr(NewMulOps);
+ return true;
}
- }
}
// In an AddRec, check if both start and step are divisible.
@@ -393,7 +375,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
PointerType *PTy,
Type *Ty,
Value *V) {
- Type *ElTy = PTy->getElementType();
+ Type *OriginalElTy = PTy->getElementType();
+ Type *ElTy = OriginalElTy;
SmallVector<Value *, 4> GepIndices;
SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
bool AnyNonZeroIndices = false;
@@ -402,9 +385,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// without the other.
SplitAddRecs(Ops, Ty, SE);
- Type *IntPtrTy = SE.DL
- ? SE.DL->getIntPtrType(PTy)
- : Type::getInt64Ty(PTy->getContext());
+ Type *IntPtrTy = DL.getIntPtrType(PTy);
// Descend down the pointer's type and attempt to convert the other
// operands into GEP indices, at each level. The first index in a GEP
@@ -422,7 +403,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
const SCEV *Op = Ops[i];
const SCEV *Remainder = SE.getConstant(Ty, 0);
- if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) {
+ if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
// Op now has ElSize factored out.
ScaledOps.push_back(Op);
if (!Remainder->isZero())
@@ -456,43 +437,25 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
bool FoundFieldNo = false;
// An empty struct has no fields.
if (STy->getNumElements() == 0) break;
- if (SE.DL) {
- // With DataLayout, field offsets are known. See if a constant offset
- // falls within any of the struct fields.
- if (Ops.empty()) break;
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
- if (SE.getTypeSizeInBits(C->getType()) <= 64) {
- const StructLayout &SL = *SE.DL->getStructLayout(STy);
- uint64_t FullOffset = C->getValue()->getZExtValue();
- if (FullOffset < SL.getSizeInBytes()) {
- unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
- GepIndices.push_back(
- ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
- ElTy = STy->getTypeAtIndex(ElIdx);
- Ops[0] =
+ // Field offsets are known. See if a constant offset falls within any of
+ // the struct fields.
+ if (Ops.empty())
+ break;
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+ if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+ const StructLayout &SL = *DL.getStructLayout(STy);
+ uint64_t FullOffset = C->getValue()->getZExtValue();
+ if (FullOffset < SL.getSizeInBytes()) {
+ unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+ GepIndices.push_back(
+ ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+ ElTy = STy->getTypeAtIndex(ElIdx);
+ Ops[0] =
SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
- AnyNonZeroIndices = true;
- FoundFieldNo = true;
- }
- }
- } else {
- // Without DataLayout, just check for an offsetof expression of the
- // appropriate struct type.
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
- Type *CTy;
- Constant *FieldNo;
- if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
- GepIndices.push_back(FieldNo);
- ElTy =
- STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
- Ops[i] = SE.getConstant(Ty, 0);
- AnyNonZeroIndices = true;
- FoundFieldNo = true;
- break;
- }
+ AnyNonZeroIndices = true;
+ FoundFieldNo = true;
}
- }
+ }
// If no struct field offsets were found, tentatively assume that
// field zero was selected (since the zero offset would obviously
// be folded away).
@@ -526,7 +489,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// Fold a GEP with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(V))
if (Constant *CRHS = dyn_cast<Constant>(Idx))
- return ConstantExpr::getGetElementPtr(CLHS, CRHS);
+ return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()),
+ CLHS, CRHS);
// Do a quick scan to see if we have this GEP nearby. If so, reuse it.
unsigned ScanLimit = 6;
@@ -561,7 +525,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
// Emit a GEP.
- Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+ Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
rememberInstruction(GEP);
return GEP;
@@ -597,7 +561,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
Value *Casted = V;
if (V->getType() != PTy)
Casted = InsertNoopCastOfTo(Casted, PTy);
- Value *GEP = Builder.CreateGEP(Casted,
+ Value *GEP = Builder.CreateGEP(OriginalElTy, Casted,
GepIndices,
"scevgep");
Ops.push_back(SE.getUnknown(GEP));
@@ -1063,6 +1027,34 @@ static bool canBeCheaplyTransformed(ScalarEvolution &SE,
return false;
}
+static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+ if (!isa<IntegerType>(AR->getType()))
+ return false;
+
+ unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
+ SE.getSignExtendExpr(AR, WideTy));
+ const SCEV *ExtendAfterOp =
+ SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+ return ExtendAfterOp == OpAfterExtend;
+}
+
+static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
+ if (!isa<IntegerType>(AR->getType()))
+ return false;
+
+ unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
+ Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
+ const SCEV *Step = AR->getStepRecurrence(SE);
+ const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
+ SE.getZeroExtendExpr(AR, WideTy));
+ const SCEV *ExtendAfterOp =
+ SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
+ return ExtendAfterOp == OpAfterExtend;
+}
+
/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
/// the base addrec, which is the addrec without any non-loop-dominating
/// values, and return the PHI.
@@ -1188,6 +1180,12 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Expand the step somewhere that dominates the loop header.
Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+ // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
+ // we actually do emit an addition. It does not apply if we emit a
+ // subtraction.
+ bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
+ bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
+
// Create the PHI.
BasicBlock *Header = L->getHeader();
Builder.SetInsertPoint(Header, Header->begin());
@@ -1213,10 +1211,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
IVIncInsertPos : Pred->getTerminator();
Builder.SetInsertPoint(InsertPos);
Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
if (isa<OverflowingBinaryOperator>(IncV)) {
- if (Normalized->getNoWrapFlags(SCEV::FlagNUW))
+ if (IncrementIsNUW)
cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
- if (Normalized->getNoWrapFlags(SCEV::FlagNSW))
+ if (IncrementIsNSW)
cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
}
PN->addIncoming(IncV, Pred);
@@ -1711,7 +1710,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// Fold constant phis. They may be congruent to other constant phis and
// would confuse the logic below that expects proper IVs.
- if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AC)) {
+ if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
Phi->replaceAllUsesWith(V);
DeadInsts.push_back(Phi);
++NumElim;
@@ -1806,6 +1805,72 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
return NumElim;
}
+bool SCEVExpander::isHighCostExpansionHelper(
+ const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+ if (!Processed.insert(S).second)
+ return false;
+
+ if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
+ // If the divisor is a power of two and the SCEV type fits in a native
+ // integer, consider the divison cheap irrespective of whether it occurs in
+ // the user code since it can be lowered into a right shift.
+ if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
+ if (SC->getValue()->getValue().isPowerOf2()) {
+ const DataLayout &DL =
+ L->getHeader()->getParent()->getParent()->getDataLayout();
+ unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
+ return DL.isIllegalInteger(Width);
+ }
+
+ // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
+ // HowManyLessThans produced to compute a precise expression, rather than a
+ // UDiv from the user's code. If we can't find a UDiv in the code with some
+ // simple searching, assume the former consider UDivExpr expensive to
+ // compute.
+ BasicBlock *ExitingBB = L->getExitingBlock();
+ if (!ExitingBB)
+ return true;
+
+ BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!ExitingBI || !ExitingBI->isConditional())
+ return true;
+
+ ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
+ if (!OrigCond)
+ return true;
+
+ const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
+ RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
+ if (RHS != S) {
+ const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
+ LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
+ if (LHS != S)
+ return true;
+ }
+ }
+
+ // Recurse past add expressions, which commonly occur in the
+ // BackedgeTakenCount. They may already exist in program code, and if not,
+ // they are not too expensive rematerialize.
+ if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ if (isHighCostExpansionHelper(*I, L, Processed))
+ return true;
+ }
+ return false;
+ }
+
+ // HowManyLessThans uses a Max expression whenever the loop is not guarded by
+ // the exit condition.
+ if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
+ return true;
+
+ // If we haven't recognized an expensive SCEV pattern, assume it's an
+ // expression produced by program code.
+ return false;
+}
+
namespace {
// Search for a SCEV subexpression that is not safe to expand. Any expression
// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index c6ea3af..02f8b0b 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -80,7 +80,7 @@ public:
initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry());
}
- void initializePass() override { InitializeAliasAnalysis(this); }
+ bool doInitialization(Module &M) override;
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
@@ -119,6 +119,11 @@ ImmutablePass *llvm::createScopedNoAliasAAPass() {
return new ScopedNoAliasAA();
}
+bool ScopedNoAliasAA::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
+
void
ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
new file mode 100644
index 0000000..635c50c
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -0,0 +1,603 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
+ "vector-library", cl::Hidden, cl::desc("Vector functions library"),
+ cl::init(TargetLibraryInfoImpl::NoLibrary),
+ cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
+ "No vector functions library"),
+ clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
+ "Accelerate framework"),
+ clEnumValEnd));
+
+const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+#define TLI_DEFINE_STRING
+#include "llvm/Analysis/TargetLibraryInfo.def"
+};
+
+static bool hasSinCosPiStret(const Triple &T) {
+ // Only Darwin variants have _stret versions of combined trig functions.
+ if (!T.isOSDarwin())
+ return false;
+
+ // The ABI is rather complicated on x86, so don't do anything special there.
+ if (T.getArch() == Triple::x86)
+ return false;
+
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
+ return false;
+
+ if (T.isiOS() && T.isOSVersionLT(7, 0))
+ return false;
+
+ return true;
+}
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple. This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
+ const char *const *StandardNames) {
+#ifndef NDEBUG
+ // Verify that the StandardNames array is in alphabetical order.
+ for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
+ if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
+ llvm_unreachable("TargetLibraryInfoImpl function names must be sorted");
+ }
+#endif // !NDEBUG
+
+ // There are no library implementations of mempcy and memset for AMD gpus and
+ // these can be difficult to lower in the backend.
+ if (T.getArch() == Triple::r600 ||
+ T.getArch() == Triple::amdgcn) {
+ TLI.setUnavailable(LibFunc::memcpy);
+ TLI.setUnavailable(LibFunc::memset);
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ return;
+ }
+
+ // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
+ if (T.isMacOSX()) {
+ if (T.isMacOSXVersionLT(10, 5))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else if (T.isiOS()) {
+ if (T.isOSVersionLT(3, 0))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else {
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ }
+
+ if (!hasSinCosPiStret(T)) {
+ TLI.setUnavailable(LibFunc::sinpi);
+ TLI.setUnavailable(LibFunc::sinpif);
+ TLI.setUnavailable(LibFunc::cospi);
+ TLI.setUnavailable(LibFunc::cospif);
+ TLI.setUnavailable(LibFunc::sincospi_stret);
+ TLI.setUnavailable(LibFunc::sincospif_stret);
+ }
+
+ if (T.isMacOSX() && T.getArch() == Triple::x86 &&
+ !T.isMacOSXVersionLT(10, 7)) {
+ // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
+ // we don't care about) have two versions; on recent OSX, the one we want
+ // has a $UNIX2003 suffix. The two implementations are identical except
+ // for the return value in some edge cases. However, we don't want to
+ // generate code that depends on the old symbols.
+ TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
+ TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+ }
+
+ // iprintf and friends are only available on XCore and TCE.
+ if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
+ TLI.setUnavailable(LibFunc::iprintf);
+ TLI.setUnavailable(LibFunc::siprintf);
+ TLI.setUnavailable(LibFunc::fiprintf);
+ }
+
+ if (T.isOSWindows() && !T.isOSCygMing()) {
+ // Win32 does not support long double
+ TLI.setUnavailable(LibFunc::acosl);
+ TLI.setUnavailable(LibFunc::asinl);
+ TLI.setUnavailable(LibFunc::atanl);
+ TLI.setUnavailable(LibFunc::atan2l);
+ TLI.setUnavailable(LibFunc::ceill);
+ TLI.setUnavailable(LibFunc::copysignl);
+ TLI.setUnavailable(LibFunc::cosl);
+ TLI.setUnavailable(LibFunc::coshl);
+ TLI.setUnavailable(LibFunc::expl);
+ TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf
+ TLI.setUnavailable(LibFunc::fabsl);
+ TLI.setUnavailable(LibFunc::floorl);
+ TLI.setUnavailable(LibFunc::fmaxl);
+ TLI.setUnavailable(LibFunc::fminl);
+ TLI.setUnavailable(LibFunc::fmodl);
+ TLI.setUnavailable(LibFunc::frexpl);
+ TLI.setUnavailable(LibFunc::ldexpf);
+ TLI.setUnavailable(LibFunc::ldexpl);
+ TLI.setUnavailable(LibFunc::logl);
+ TLI.setUnavailable(LibFunc::modfl);
+ TLI.setUnavailable(LibFunc::powl);
+ TLI.setUnavailable(LibFunc::sinl);
+ TLI.setUnavailable(LibFunc::sinhl);
+ TLI.setUnavailable(LibFunc::sqrtl);
+ TLI.setUnavailable(LibFunc::tanl);
+ TLI.setUnavailable(LibFunc::tanhl);
+
+ // Win32 only has C89 math
+ TLI.setUnavailable(LibFunc::acosh);
+ TLI.setUnavailable(LibFunc::acoshf);
+ TLI.setUnavailable(LibFunc::acoshl);
+ TLI.setUnavailable(LibFunc::asinh);
+ TLI.setUnavailable(LibFunc::asinhf);
+ TLI.setUnavailable(LibFunc::asinhl);
+ TLI.setUnavailable(LibFunc::atanh);
+ TLI.setUnavailable(LibFunc::atanhf);
+ TLI.setUnavailable(LibFunc::atanhl);
+ TLI.setUnavailable(LibFunc::cbrt);
+ TLI.setUnavailable(LibFunc::cbrtf);
+ TLI.setUnavailable(LibFunc::cbrtl);
+ TLI.setUnavailable(LibFunc::exp2);
+ TLI.setUnavailable(LibFunc::exp2f);
+ TLI.setUnavailable(LibFunc::exp2l);
+ TLI.setUnavailable(LibFunc::expm1);
+ TLI.setUnavailable(LibFunc::expm1f);
+ TLI.setUnavailable(LibFunc::expm1l);
+ TLI.setUnavailable(LibFunc::log2);
+ TLI.setUnavailable(LibFunc::log2f);
+ TLI.setUnavailable(LibFunc::log2l);
+ TLI.setUnavailable(LibFunc::log1p);
+ TLI.setUnavailable(LibFunc::log1pf);
+ TLI.setUnavailable(LibFunc::log1pl);
+ TLI.setUnavailable(LibFunc::logb);
+ TLI.setUnavailable(LibFunc::logbf);
+ TLI.setUnavailable(LibFunc::logbl);
+ TLI.setUnavailable(LibFunc::nearbyint);
+ TLI.setUnavailable(LibFunc::nearbyintf);
+ TLI.setUnavailable(LibFunc::nearbyintl);
+ TLI.setUnavailable(LibFunc::rint);
+ TLI.setUnavailable(LibFunc::rintf);
+ TLI.setUnavailable(LibFunc::rintl);
+ TLI.setUnavailable(LibFunc::round);
+ TLI.setUnavailable(LibFunc::roundf);
+ TLI.setUnavailable(LibFunc::roundl);
+ TLI.setUnavailable(LibFunc::trunc);
+ TLI.setUnavailable(LibFunc::truncf);
+ TLI.setUnavailable(LibFunc::truncl);
+
+ // Win32 provides some C99 math with mangled names
+ TLI.setAvailableWithName(LibFunc::copysign, "_copysign");
+
+ if (T.getArch() == Triple::x86) {
+ // Win32 on x86 implements single-precision math functions as macros
+ TLI.setUnavailable(LibFunc::acosf);
+ TLI.setUnavailable(LibFunc::asinf);
+ TLI.setUnavailable(LibFunc::atanf);
+ TLI.setUnavailable(LibFunc::atan2f);
+ TLI.setUnavailable(LibFunc::ceilf);
+ TLI.setUnavailable(LibFunc::copysignf);
+ TLI.setUnavailable(LibFunc::cosf);
+ TLI.setUnavailable(LibFunc::coshf);
+ TLI.setUnavailable(LibFunc::expf);
+ TLI.setUnavailable(LibFunc::floorf);
+ TLI.setUnavailable(LibFunc::fminf);
+ TLI.setUnavailable(LibFunc::fmaxf);
+ TLI.setUnavailable(LibFunc::fmodf);
+ TLI.setUnavailable(LibFunc::logf);
+ TLI.setUnavailable(LibFunc::powf);
+ TLI.setUnavailable(LibFunc::sinf);
+ TLI.setUnavailable(LibFunc::sinhf);
+ TLI.setUnavailable(LibFunc::sqrtf);
+ TLI.setUnavailable(LibFunc::tanf);
+ TLI.setUnavailable(LibFunc::tanhf);
+ }
+
+ // Win32 does *not* provide provide these functions, but they are
+ // generally available on POSIX-compliant systems:
+ TLI.setUnavailable(LibFunc::access);
+ TLI.setUnavailable(LibFunc::bcmp);
+ TLI.setUnavailable(LibFunc::bcopy);
+ TLI.setUnavailable(LibFunc::bzero);
+ TLI.setUnavailable(LibFunc::chmod);
+ TLI.setUnavailable(LibFunc::chown);
+ TLI.setUnavailable(LibFunc::closedir);
+ TLI.setUnavailable(LibFunc::ctermid);
+ TLI.setUnavailable(LibFunc::fdopen);
+ TLI.setUnavailable(LibFunc::ffs);
+ TLI.setUnavailable(LibFunc::fileno);
+ TLI.setUnavailable(LibFunc::flockfile);
+ TLI.setUnavailable(LibFunc::fseeko);
+ TLI.setUnavailable(LibFunc::fstat);
+ TLI.setUnavailable(LibFunc::fstatvfs);
+ TLI.setUnavailable(LibFunc::ftello);
+ TLI.setUnavailable(LibFunc::ftrylockfile);
+ TLI.setUnavailable(LibFunc::funlockfile);
+ TLI.setUnavailable(LibFunc::getc_unlocked);
+ TLI.setUnavailable(LibFunc::getitimer);
+ TLI.setUnavailable(LibFunc::getlogin_r);
+ TLI.setUnavailable(LibFunc::getpwnam);
+ TLI.setUnavailable(LibFunc::gettimeofday);
+ TLI.setUnavailable(LibFunc::htonl);
+ TLI.setUnavailable(LibFunc::htons);
+ TLI.setUnavailable(LibFunc::lchown);
+ TLI.setUnavailable(LibFunc::lstat);
+ TLI.setUnavailable(LibFunc::memccpy);
+ TLI.setUnavailable(LibFunc::mkdir);
+ TLI.setUnavailable(LibFunc::ntohl);
+ TLI.setUnavailable(LibFunc::ntohs);
+ TLI.setUnavailable(LibFunc::open);
+ TLI.setUnavailable(LibFunc::opendir);
+ TLI.setUnavailable(LibFunc::pclose);
+ TLI.setUnavailable(LibFunc::popen);
+ TLI.setUnavailable(LibFunc::pread);
+ TLI.setUnavailable(LibFunc::pwrite);
+ TLI.setUnavailable(LibFunc::read);
+ TLI.setUnavailable(LibFunc::readlink);
+ TLI.setUnavailable(LibFunc::realpath);
+ TLI.setUnavailable(LibFunc::rmdir);
+ TLI.setUnavailable(LibFunc::setitimer);
+ TLI.setUnavailable(LibFunc::stat);
+ TLI.setUnavailable(LibFunc::statvfs);
+ TLI.setUnavailable(LibFunc::stpcpy);
+ TLI.setUnavailable(LibFunc::stpncpy);
+ TLI.setUnavailable(LibFunc::strcasecmp);
+ TLI.setUnavailable(LibFunc::strncasecmp);
+ TLI.setUnavailable(LibFunc::times);
+ TLI.setUnavailable(LibFunc::uname);
+ TLI.setUnavailable(LibFunc::unlink);
+ TLI.setUnavailable(LibFunc::unsetenv);
+ TLI.setUnavailable(LibFunc::utime);
+ TLI.setUnavailable(LibFunc::utimes);
+ TLI.setUnavailable(LibFunc::write);
+
+ // Win32 does *not* provide provide these functions, but they are
+ // specified by C99:
+ TLI.setUnavailable(LibFunc::atoll);
+ TLI.setUnavailable(LibFunc::frexpf);
+ TLI.setUnavailable(LibFunc::llabs);
+ }
+
+ switch (T.getOS()) {
+ case Triple::MacOSX:
+ // exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0
+ // and their names are __exp10 and __exp10f. exp10l is not available on
+ // OS X or iOS.
+ TLI.setUnavailable(LibFunc::exp10l);
+ if (T.isMacOSXVersionLT(10, 9)) {
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ } else {
+ TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
+ TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+ }
+ break;
+ case Triple::IOS:
+ TLI.setUnavailable(LibFunc::exp10l);
+ if (T.isOSVersionLT(7, 0)) {
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ } else {
+ TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
+ TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+ }
+ break;
+ case Triple::Linux:
+ // exp10, exp10f, exp10l is available on Linux (GLIBC) but are extremely
+ // buggy prior to glibc version 2.18. Until this version is widely deployed
+ // or we have a reasonable detection strategy, we cannot use exp10 reliably
+ // on Linux.
+ //
+ // Fall through to disable all of them.
+ default:
+ TLI.setUnavailable(LibFunc::exp10);
+ TLI.setUnavailable(LibFunc::exp10f);
+ TLI.setUnavailable(LibFunc::exp10l);
+ }
+
+ // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
+ // Linux (GLIBC):
+ // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
+ switch (T.getOS()) {
+ case Triple::Darwin:
+ case Triple::MacOSX:
+ case Triple::IOS:
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsl);
+ }
+
+ // ffsll is available on at least FreeBSD and Linux (GLIBC):
+ // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+ // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
+ switch (T.getOS()) {
+ case Triple::FreeBSD:
+ case Triple::Linux:
+ break;
+ default:
+ TLI.setUnavailable(LibFunc::ffsll);
+ }
+
+ // The following functions are available on at least Linux:
+ if (!T.isOSLinux()) {
+ TLI.setUnavailable(LibFunc::dunder_strdup);
+ TLI.setUnavailable(LibFunc::dunder_strtok_r);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
+ TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
+ TLI.setUnavailable(LibFunc::under_IO_getc);
+ TLI.setUnavailable(LibFunc::under_IO_putc);
+ TLI.setUnavailable(LibFunc::memalign);
+ TLI.setUnavailable(LibFunc::fopen64);
+ TLI.setUnavailable(LibFunc::fseeko64);
+ TLI.setUnavailable(LibFunc::fstat64);
+ TLI.setUnavailable(LibFunc::fstatvfs64);
+ TLI.setUnavailable(LibFunc::ftello64);
+ TLI.setUnavailable(LibFunc::lstat64);
+ TLI.setUnavailable(LibFunc::open64);
+ TLI.setUnavailable(LibFunc::stat64);
+ TLI.setUnavailable(LibFunc::statvfs64);
+ TLI.setUnavailable(LibFunc::tmpfile64);
+ }
+
+ TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl() {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, Triple(), StandardNames);
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) {
+ // Default to everything being available.
+ memset(AvailableArray, -1, sizeof(AvailableArray));
+
+ initialize(*this, T, StandardNames);
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI)
+ : CustomNames(TLI.CustomNames) {
+ memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+ VectorDescs = TLI.VectorDescs;
+ ScalarDescs = TLI.ScalarDescs;
+}
+
+TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
+ : CustomNames(std::move(TLI.CustomNames)) {
+ std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
+ AvailableArray);
+ VectorDescs = TLI.VectorDescs;
+ ScalarDescs = TLI.ScalarDescs;
+}
+
+TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) {
+ CustomNames = TLI.CustomNames;
+ memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+ return *this;
+}
+
+TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) {
+ CustomNames = std::move(TLI.CustomNames);
+ std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
+ AvailableArray);
+ return *this;
+}
+
+static StringRef sanitizeFunctionName(StringRef funcName) {
+ // Filter out empty names and names containing null bytes, those can't be in
+ // our table.
+ if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+ return StringRef();
+
+ // Check for \01 prefix that is used to mangle __asm declarations and
+ // strip it if present.
+ return GlobalValue::getRealLinkageName(funcName);
+}
+
+bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
+ LibFunc::Func &F) const {
+ const char *const *Start = &StandardNames[0];
+ const char *const *End = &StandardNames[LibFunc::NumLibFuncs];
+
+ funcName = sanitizeFunctionName(funcName);
+ if (funcName.empty())
+ return false;
+
+ const char *const *I = std::lower_bound(
+ Start, End, funcName, [](const char *LHS, StringRef RHS) {
+ return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+ });
+ if (I != End && *I == funcName) {
+ F = (LibFunc::Func)(I - Start);
+ return true;
+ }
+ return false;
+}
+
+void TargetLibraryInfoImpl::disableAllFunctions() {
+ memset(AvailableArray, 0, sizeof(AvailableArray));
+}
+
+static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
+ return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName,
+ std::strlen(RHS.ScalarFnName)) < 0;
+}
+
+static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
+ return std::strncmp(LHS.VectorFnName, RHS.VectorFnName,
+ std::strlen(RHS.VectorFnName)) < 0;
+}
+
+static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
+ return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0;
+}
+
+static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
+ return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0;
+}
+
+void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
+ VectorDescs.insert(VectorDescs.end(), Fns.begin(), Fns.end());
+ std::sort(VectorDescs.begin(), VectorDescs.end(), compareByScalarFnName);
+
+ ScalarDescs.insert(ScalarDescs.end(), Fns.begin(), Fns.end());
+ std::sort(ScalarDescs.begin(), ScalarDescs.end(), compareByVectorFnName);
+}
+
+void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
+ enum VectorLibrary VecLib) {
+ switch (VecLib) {
+ case Accelerate: {
+ const VecDesc VecFuncs[] = {
+ // Floating-Point Arithmetic and Auxiliary Functions
+ {"ceilf", "vceilf", 4},
+ {"fabsf", "vfabsf", 4},
+ {"llvm.fabs.f32", "vfabsf", 4},
+ {"floorf", "vfloorf", 4},
+ {"sqrtf", "vsqrtf", 4},
+ {"llvm.sqrt.f32", "vsqrtf", 4},
+
+ // Exponential and Logarithmic Functions
+ {"expf", "vexpf", 4},
+ {"llvm.exp.f32", "vexpf", 4},
+ {"expm1f", "vexpm1f", 4},
+ {"logf", "vlogf", 4},
+ {"llvm.log.f32", "vlogf", 4},
+ {"log1pf", "vlog1pf", 4},
+ {"log10f", "vlog10f", 4},
+ {"llvm.log10.f32", "vlog10f", 4},
+ {"logbf", "vlogbf", 4},
+
+ // Trigonometric Functions
+ {"sinf", "vsinf", 4},
+ {"llvm.sin.f32", "vsinf", 4},
+ {"cosf", "vcosf", 4},
+ {"llvm.cos.f32", "vcosf", 4},
+ {"tanf", "vtanf", 4},
+ {"asinf", "vasinf", 4},
+ {"acosf", "vacosf", 4},
+ {"atanf", "vatanf", 4},
+
+ // Hyperbolic Functions
+ {"sinhf", "vsinhf", 4},
+ {"coshf", "vcoshf", 4},
+ {"tanhf", "vtanhf", 4},
+ {"asinhf", "vasinhf", 4},
+ {"acoshf", "vacoshf", 4},
+ {"atanhf", "vatanhf", 4},
+ };
+ addVectorizableFunctions(VecFuncs);
+ break;
+ }
+ case NoLibrary:
+ break;
+ }
+}
+
+bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
+ funcName = sanitizeFunctionName(funcName);
+ if (funcName.empty())
+ return false;
+
+ std::vector<VecDesc>::const_iterator I = std::lower_bound(
+ VectorDescs.begin(), VectorDescs.end(), funcName,
+ compareWithScalarFnName);
+ return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
+}
+
+StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+ unsigned VF) const {
+ F = sanitizeFunctionName(F);
+ if (F.empty())
+ return F;
+ std::vector<VecDesc>::const_iterator I = std::lower_bound(
+ VectorDescs.begin(), VectorDescs.end(), F, compareWithScalarFnName);
+ while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
+ if (I->VectorizationFactor == VF)
+ return I->VectorFnName;
+ ++I;
+ }
+ return StringRef();
+}
+
+StringRef TargetLibraryInfoImpl::getScalarizedFunction(StringRef F,
+ unsigned &VF) const {
+ F = sanitizeFunctionName(F);
+ if (F.empty())
+ return F;
+
+ std::vector<VecDesc>::const_iterator I = std::lower_bound(
+ ScalarDescs.begin(), ScalarDescs.end(), F, compareWithVectorFnName);
+ if (I == VectorDescs.end() || StringRef(I->VectorFnName) != F)
+ return StringRef();
+ VF = I->VectorizationFactor;
+ return I->ScalarFnName;
+}
+
+TargetLibraryInfo TargetLibraryAnalysis::run(Module &M) {
+ if (PresetInfoImpl)
+ return TargetLibraryInfo(*PresetInfoImpl);
+
+ return TargetLibraryInfo(lookupInfoImpl(Triple(M.getTargetTriple())));
+}
+
+TargetLibraryInfo TargetLibraryAnalysis::run(Function &F) {
+ if (PresetInfoImpl)
+ return TargetLibraryInfo(*PresetInfoImpl);
+
+ return TargetLibraryInfo(
+ lookupInfoImpl(Triple(F.getParent()->getTargetTriple())));
+}
+
+TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(Triple T) {
+ std::unique_ptr<TargetLibraryInfoImpl> &Impl =
+ Impls[T.normalize()];
+ if (!Impl)
+ Impl.reset(new TargetLibraryInfoImpl(T));
+
+ return *Impl;
+}
+
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass()
+ : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) {
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(const Triple &T)
+ : ImmutablePass(ID), TLIImpl(T), TLI(TLIImpl) {
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
+ const TargetLibraryInfoImpl &TLIImpl)
+ : ImmutablePass(ID), TLIImpl(TLIImpl), TLI(this->TLIImpl) {
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char TargetLibraryAnalysis::PassID;
+
+// Register the basic pass.
+INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
+ "Target Library Information", false, true)
+char TargetLibraryInfoWrapperPass::ID = 0;
+
+void TargetLibraryInfoWrapperPass::anchor() {}
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index ef3909b..e1744d1 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -8,11 +8,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/ErrorHandling.h"
@@ -20,637 +22,303 @@ using namespace llvm;
#define DEBUG_TYPE "tti"
-// Setup the analysis group to manage the TargetTransformInfo passes.
-INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI)
-char TargetTransformInfo::ID = 0;
-
-TargetTransformInfo::~TargetTransformInfo() {
+namespace {
+/// \brief No-op implementation of the TTI interface using the utility base
+/// classes.
+///
+/// This is used when no target specific information is available.
+struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
+ explicit NoTTIImpl(const DataLayout *DL)
+ : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
+};
}
-void TargetTransformInfo::pushTTIStack(Pass *P) {
- TopTTI = this;
- PrevTTI = &P->getAnalysis<TargetTransformInfo>();
+TargetTransformInfo::TargetTransformInfo(const DataLayout *DL)
+ : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
- // Walk up the chain and update the top TTI pointer.
- for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI)
- PTTI->TopTTI = this;
-}
+TargetTransformInfo::~TargetTransformInfo() {}
-void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetTransformInfo>();
+TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
+ : TTIImpl(std::move(Arg.TTIImpl)) {}
+
+TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
+ TTIImpl = std::move(RHS.TTIImpl);
+ return *this;
}
unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
Type *OpTy) const {
- return PrevTTI->getOperationCost(Opcode, Ty, OpTy);
-}
-
-unsigned TargetTransformInfo::getGEPCost(
- const Value *Ptr, ArrayRef<const Value *> Operands) const {
- return PrevTTI->getGEPCost(Ptr, Operands);
+ return TTIImpl->getOperationCost(Opcode, Ty, OpTy);
}
unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
int NumArgs) const {
- return PrevTTI->getCallCost(FTy, NumArgs);
+ return TTIImpl->getCallCost(FTy, NumArgs);
}
-unsigned TargetTransformInfo::getCallCost(const Function *F,
- int NumArgs) const {
- return PrevTTI->getCallCost(F, NumArgs);
-}
-
-unsigned TargetTransformInfo::getCallCost(
- const Function *F, ArrayRef<const Value *> Arguments) const {
- return PrevTTI->getCallCost(F, Arguments);
-}
-
-unsigned TargetTransformInfo::getIntrinsicCost(
- Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> ParamTys) const {
- return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys);
+unsigned
+TargetTransformInfo::getCallCost(const Function *F,
+ ArrayRef<const Value *> Arguments) const {
+ return TTIImpl->getCallCost(F, Arguments);
}
-unsigned TargetTransformInfo::getIntrinsicCost(
- Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
- return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments);
+unsigned
+TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments) const {
+ return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
}
unsigned TargetTransformInfo::getUserCost(const User *U) const {
- return PrevTTI->getUserCost(U);
+ return TTIImpl->getUserCost(U);
}
bool TargetTransformInfo::hasBranchDivergence() const {
- return PrevTTI->hasBranchDivergence();
+ return TTIImpl->hasBranchDivergence();
+}
+
+bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
+ return TTIImpl->isSourceOfDivergence(V);
}
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
- return PrevTTI->isLoweredToCall(F);
+ return TTIImpl->isLoweredToCall(F);
}
-void
-TargetTransformInfo::getUnrollingPreferences(const Function *F, Loop *L,
- UnrollingPreferences &UP) const {
- PrevTTI->getUnrollingPreferences(F, L, UP);
+void TargetTransformInfo::getUnrollingPreferences(
+ Loop *L, UnrollingPreferences &UP) const {
+ return TTIImpl->getUnrollingPreferences(L, UP);
}
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
- return PrevTTI->isLegalAddImmediate(Imm);
+ return TTIImpl->isLegalAddImmediate(Imm);
}
bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
- return PrevTTI->isLegalICmpImmediate(Imm);
+ return TTIImpl->isLegalICmpImmediate(Imm);
}
-bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
- int Consecutive) const {
- return false;
+bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
+ int64_t BaseOffset,
+ bool HasBaseReg,
+ int64_t Scale) const {
+ return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
+ Scale);
}
bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
int Consecutive) const {
- return false;
+ return TTIImpl->isLegalMaskedStore(DataType, Consecutive);
}
-
-bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
- int64_t BaseOffset,
- bool HasBaseReg,
- int64_t Scale) const {
- return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale);
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
+ int Consecutive) const {
+ return TTIImpl->isLegalMaskedLoad(DataType, Consecutive);
}
int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
int64_t Scale) const {
- return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+ return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale);
}
bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
- return PrevTTI->isTruncateFree(Ty1, Ty2);
+ return TTIImpl->isTruncateFree(Ty1, Ty2);
+}
+
+bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
+ return TTIImpl->isProfitableToHoist(I);
}
bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
- return PrevTTI->isTypeLegal(Ty);
+ return TTIImpl->isTypeLegal(Ty);
}
unsigned TargetTransformInfo::getJumpBufAlignment() const {
- return PrevTTI->getJumpBufAlignment();
+ return TTIImpl->getJumpBufAlignment();
}
unsigned TargetTransformInfo::getJumpBufSize() const {
- return PrevTTI->getJumpBufSize();
+ return TTIImpl->getJumpBufSize();
}
bool TargetTransformInfo::shouldBuildLookupTables() const {
- return PrevTTI->shouldBuildLookupTables();
+ return TTIImpl->shouldBuildLookupTables();
+}
+
+bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
+ return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
TargetTransformInfo::PopcntSupportKind
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
- return PrevTTI->getPopcntSupport(IntTyWidthInBit);
+ return TTIImpl->getPopcntSupport(IntTyWidthInBit);
}
bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
- return PrevTTI->haveFastSqrt(Ty);
+ return TTIImpl->haveFastSqrt(Ty);
+}
+
+unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
+ return TTIImpl->getFPOpCost(Ty);
}
unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
- return PrevTTI->getIntImmCost(Imm, Ty);
+ return TTIImpl->getIntImmCost(Imm, Ty);
}
-unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx,
+unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty) const {
- return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty);
+ return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
}
unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty) const {
- return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty);
+ return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
}
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
- return PrevTTI->getNumberOfRegisters(Vector);
+ return TTIImpl->getNumberOfRegisters(Vector);
}
unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
- return PrevTTI->getRegisterBitWidth(Vector);
+ return TTIImpl->getRegisterBitWidth(Vector);
}
-unsigned TargetTransformInfo::getMaxInterleaveFactor() const {
- return PrevTTI->getMaxInterleaveFactor();
+unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
+ return TTIImpl->getMaxInterleaveFactor(VF);
}
unsigned TargetTransformInfo::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
- OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo,
+ unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
OperandValueProperties Opd2PropInfo) const {
- return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo);
}
-unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp,
+unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty,
int Index, Type *SubTp) const {
- return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp);
+ return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
}
unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src) const {
- return PrevTTI->getCastInstrCost(Opcode, Dst, Src);
+ return TTIImpl->getCastInstrCost(Opcode, Dst, Src);
}
unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
- return PrevTTI->getCFInstrCost(Opcode);
+ return TTIImpl->getCFInstrCost(Opcode);
}
unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) const {
- return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const {
- return PrevTTI->getVectorInstrCost(Opcode, Val, Index);
+ return TTIImpl->getVectorInstrCost(Opcode, Val, Index);
}
unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
unsigned AddressSpace) const {
- return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
}
unsigned
-TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID,
- Type *RetTy,
+TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+}
+
+unsigned
+TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type *> Tys) const {
- return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys);
+ return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+}
+
+unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys) const {
+ return TTIImpl->getCallInstrCost(F, RetTy, Tys);
}
unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
- return PrevTTI->getNumberOfParts(Tp);
+ return TTIImpl->getNumberOfParts(Tp);
}
unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
bool IsComplex) const {
- return PrevTTI->getAddressComputationCost(Tp, IsComplex);
+ return TTIImpl->getAddressComputationCost(Tp, IsComplex);
}
unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwise) const {
- return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
+ bool IsPairwiseForm) const {
+ return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
}
-unsigned TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys)
- const {
- return PrevTTI->getCostOfKeepingLiveOverCall(Tys);
+unsigned
+TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
+ return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
}
-namespace {
+bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
+ MemIntrinsicInfo &Info) const {
+ return TTIImpl->getTgtMemIntrinsic(Inst, Info);
+}
-struct NoTTI final : ImmutablePass, TargetTransformInfo {
- const DataLayout *DL;
-
- NoTTI() : ImmutablePass(ID), DL(nullptr) {
- initializeNoTTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override {
- // Note that this subclass is special, and must *not* call initializeTTI as
- // it does not chain.
- TopTTI = this;
- PrevTTI = nullptr;
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // Note that this subclass is special, and must *not* call
- // TTI::getAnalysisUsage as it breaks the recursion.
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo*)this;
- return this;
- }
-
- unsigned getOperationCost(unsigned Opcode, Type *Ty,
- Type *OpTy) const override {
- switch (Opcode) {
- default:
- // By default, just classify everything as 'basic'.
- return TCC_Basic;
-
- case Instruction::GetElementPtr:
- llvm_unreachable("Use getGEPCost for GEP operations!");
-
- case Instruction::BitCast:
- assert(OpTy && "Cast instructions must provide the operand type");
- if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
- // Identity and pointer-to-pointer casts are free.
- return TCC_Free;
-
- // Otherwise, the default basic cost is used.
- return TCC_Basic;
-
- case Instruction::IntToPtr: {
- if (!DL)
- return TCC_Basic;
-
- // An inttoptr cast is free so long as the input is a legal integer type
- // which doesn't contain values outside the range of a pointer.
- unsigned OpSize = OpTy->getScalarSizeInBits();
- if (DL->isLegalInteger(OpSize) &&
- OpSize <= DL->getPointerTypeSizeInBits(Ty))
- return TCC_Free;
-
- // Otherwise it's not a no-op.
- return TCC_Basic;
- }
- case Instruction::PtrToInt: {
- if (!DL)
- return TCC_Basic;
-
- // A ptrtoint cast is free so long as the result is large enough to store
- // the pointer, and a legal integer type.
- unsigned DestSize = Ty->getScalarSizeInBits();
- if (DL->isLegalInteger(DestSize) &&
- DestSize >= DL->getPointerTypeSizeInBits(OpTy))
- return TCC_Free;
-
- // Otherwise it's not a no-op.
- return TCC_Basic;
- }
- case Instruction::Trunc:
- // trunc to a native type is free (assuming the target has compare and
- // shift-right of the same width).
- if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty)))
- return TCC_Free;
-
- return TCC_Basic;
- }
- }
-
- unsigned getGEPCost(const Value *Ptr,
- ArrayRef<const Value *> Operands) const override {
- // In the basic model, we just assume that all-constant GEPs will be folded
- // into their uses via addressing modes.
- for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
- if (!isa<Constant>(Operands[Idx]))
- return TCC_Basic;
-
- return TCC_Free;
- }
-
- unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const override
- {
- assert(FTy && "FunctionType must be provided to this routine.");
-
- // The target-independent implementation just measures the size of the
- // function by approximating that each argument will take on average one
- // instruction to prepare.
-
- if (NumArgs < 0)
- // Set the argument number to the number of explicit arguments in the
- // function.
- NumArgs = FTy->getNumParams();
-
- return TCC_Basic * (NumArgs + 1);
- }
-
- unsigned getCallCost(const Function *F, int NumArgs = -1) const override
- {
- assert(F && "A concrete function must be provided to this routine.");
-
- if (NumArgs < 0)
- // Set the argument number to the number of explicit arguments in the
- // function.
- NumArgs = F->arg_size();
-
- if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) {
- FunctionType *FTy = F->getFunctionType();
- SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
- return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
- }
-
- if (!TopTTI->isLoweredToCall(F))
- return TCC_Basic; // Give a basic cost if it will be lowered directly.
-
- return TopTTI->getCallCost(F->getFunctionType(), NumArgs);
- }
-
- unsigned getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) const override {
- // Simply delegate to generic handling of the call.
- // FIXME: We should use instsimplify or something else to catch calls which
- // will constant fold with these arguments.
- return TopTTI->getCallCost(F, Arguments.size());
- }
-
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) const override {
- switch (IID) {
- default:
- // Intrinsics rarely (if ever) have normal argument setup constraints.
- // Model them as having a basic instruction cost.
- // FIXME: This is wrong for libc intrinsics.
- return TCC_Basic;
-
- case Intrinsic::annotation:
- case Intrinsic::assume:
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::objectsize:
- case Intrinsic::ptr_annotation:
- case Intrinsic::var_annotation:
- case Intrinsic::experimental_gc_result_int:
- case Intrinsic::experimental_gc_result_float:
- case Intrinsic::experimental_gc_result_ptr:
- case Intrinsic::experimental_gc_relocate:
- // These intrinsics don't actually represent code after lowering.
- return TCC_Free;
- }
- }
-
- unsigned
- getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) const override {
- // Delegate to the generic intrinsic handling code. This mostly provides an
- // opportunity for targets to (for example) special case the cost of
- // certain intrinsics based on constants used as arguments.
- SmallVector<Type *, 8> ParamTys;
- ParamTys.reserve(Arguments.size());
- for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
- ParamTys.push_back(Arguments[Idx]->getType());
- return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys);
- }
-
- unsigned getUserCost(const User *U) const override {
- if (isa<PHINode>(U))
- return TCC_Free; // Model all PHI nodes as free.
-
- if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
- SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
- return TopTTI->getGEPCost(GEP->getPointerOperand(), Indices);
- }
-
- if (ImmutableCallSite CS = U) {
- const Function *F = CS.getCalledFunction();
- if (!F) {
- // Just use the called value type.
- Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
- return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
- }
-
- SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
- return TopTTI->getCallCost(F, Arguments);
- }
-
- if (const CastInst *CI = dyn_cast<CastInst>(U)) {
- // Result of a cmp instruction is often extended (to be used by other
- // cmp instructions, logical or return instructions). These are usually
- // nop on most sane targets.
- if (isa<CmpInst>(CI->getOperand(0)))
- return TCC_Free;
- }
-
- // Otherwise delegate to the fully generic implementations.
- return getOperationCost(Operator::getOpcode(U), U->getType(),
- U->getNumOperands() == 1 ?
- U->getOperand(0)->getType() : nullptr);
- }
-
- bool hasBranchDivergence() const override { return false; }
-
- bool isLoweredToCall(const Function *F) const override {
- // FIXME: These should almost certainly not be handled here, and instead
- // handled with the help of TLI or the target itself. This was largely
- // ported from existing analysis heuristics here so that such refactorings
- // can take place in the future.
-
- if (F->isIntrinsic())
- return false;
-
- if (F->hasLocalLinkage() || !F->hasName())
- return true;
-
- StringRef Name = F->getName();
-
- // These will all likely lower to a single selection DAG node.
- if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
- Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
- Name == "fmin" || Name == "fminf" || Name == "fminl" ||
- Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
- Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
- Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
- return false;
-
- // These are all likely to be optimized into something smaller.
- if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
- Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name ==
- "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" ||
- Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs")
- return false;
-
- return true;
- }
-
- void getUnrollingPreferences(const Function *, Loop *,
- UnrollingPreferences &) const override {}
-
- bool isLegalAddImmediate(int64_t Imm) const override {
- return false;
- }
-
- bool isLegalICmpImmediate(int64_t Imm) const override {
- return false;
- }
-
- bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) const override
- {
- // Guess that reg+reg addressing is allowed. This heuristic is taken from
- // the implementation of LSR.
- return !BaseGV && BaseOffset == 0 && Scale <= 1;
- }
-
- int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) const override {
- // Guess that all legal addressing mode are free.
- if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale))
- return 0;
- return -1;
- }
-
- bool isTruncateFree(Type *Ty1, Type *Ty2) const override {
- return false;
- }
-
- bool isTypeLegal(Type *Ty) const override {
- return false;
- }
-
- unsigned getJumpBufAlignment() const override {
- return 0;
- }
-
- unsigned getJumpBufSize() const override {
- return 0;
- }
-
- bool shouldBuildLookupTables() const override {
- return true;
- }
-
- PopcntSupportKind
- getPopcntSupport(unsigned IntTyWidthInBit) const override {
- return PSK_Software;
- }
-
- bool haveFastSqrt(Type *Ty) const override {
- return false;
- }
-
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override {
- return TCC_Basic;
- }
-
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty) const override {
- return TCC_Free;
- }
-
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) const override {
- return TCC_Free;
- }
-
- unsigned getNumberOfRegisters(bool Vector) const override {
- return 8;
- }
-
- unsigned getRegisterBitWidth(bool Vector) const override {
- return 32;
- }
-
- unsigned getMaxInterleaveFactor() const override {
- return 1;
- }
-
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
- OperandValueKind, OperandValueProperties,
- OperandValueProperties) const override {
- return 1;
- }
-
- unsigned getShuffleCost(ShuffleKind Kind, Type *Ty,
- int Index = 0, Type *SubTp = nullptr) const override {
- return 1;
- }
-
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const override {
- return 1;
- }
-
- unsigned getCFInstrCost(unsigned Opcode) const override {
- return 1;
- }
-
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy = nullptr) const override {
- return 1;
- }
-
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index = -1) const override {
- return 1;
- }
-
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const override {
- return 1;
- }
-
- unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type*> Tys) const override {
- return 1;
- }
-
- unsigned getNumberOfParts(Type *Tp) const override {
- return 0;
- }
-
- unsigned getAddressComputationCost(Type *Tp, bool) const override {
- return 0;
- }
-
- unsigned getReductionCost(unsigned, Type *, bool) const override {
- return 1;
- }
-
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type*> Tys) const override {
- return 0;
- }
+Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
+ IntrinsicInst *Inst, Type *ExpectedType) const {
+ return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
+}
-};
+TargetTransformInfo::Concept::~Concept() {}
+
+TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
+
+TargetIRAnalysis::TargetIRAnalysis(
+ std::function<Result(Function &)> TTICallback)
+ : TTICallback(TTICallback) {}
-} // end anonymous namespace
+TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
+ return TTICallback(F);
+}
+
+char TargetIRAnalysis::PassID;
+
+TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
+ return Result(&F.getParent()->getDataLayout());
+}
-INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti",
- "No target information", true, true, true)
-char NoTTI::ID = 0;
+// Register the basic pass.
+INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
+ "Target Transform Information", false, true)
+char TargetTransformInfoWrapperPass::ID = 0;
+
+void TargetTransformInfoWrapperPass::anchor() {}
+
+TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
+ : ImmutablePass(ID) {
+ initializeTargetTransformInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
+ TargetIRAnalysis TIRA)
+ : ImmutablePass(ID), TIRA(std::move(TIRA)) {
+ initializeTargetTransformInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) {
+ TTI = TIRA.run(F);
+ return *TTI;
+}
-ImmutablePass *llvm::createNoTargetTransformInfoPass() {
- return new NoTTI();
+ImmutablePass *
+llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
+ return new TargetTransformInfoWrapperPass(std::move(TIRA));
}
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index ff89558..1158725 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -129,6 +129,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SetVector.h"
using namespace llvm;
// A handy option for disabling TBAA functionality. The same effect can also be
@@ -282,9 +283,7 @@ namespace {
initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
}
- void initializePass() override {
- InitializeAliasAnalysis(this);
- }
+ bool doInitialization(Module &M) override;
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
@@ -321,6 +320,11 @@ ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
return new TypeBasedAliasAnalysis();
}
+bool TypeBasedAliasAnalysis::doInitialization(Module &M) {
+ InitializeAliasAnalysis(this, &M.getDataLayout());
+ return true;
+}
+
void
TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -575,18 +579,22 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
if (!B) return nullptr;
}
- SmallVector<MDNode *, 4> PathA;
+ SmallSetVector<MDNode *, 4> PathA;
MDNode *T = A;
while (T) {
- PathA.push_back(T);
+ if (PathA.count(T))
+ report_fatal_error("Cycle found in TBAA metadata.");
+ PathA.insert(T);
T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
: nullptr;
}
- SmallVector<MDNode *, 4> PathB;
+ SmallSetVector<MDNode *, 4> PathB;
T = B;
while (T) {
- PathB.push_back(T);
+ if (PathB.count(T))
+ report_fatal_error("Cycle found in TBAA metadata.");
+ PathB.insert(T);
T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
: nullptr;
}
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index 5d90917..a55712c 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Statepoint.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include <cstring>
@@ -39,13 +41,41 @@ using namespace llvm::PatternMatch;
const unsigned MaxDepth = 6;
+/// Enable an experimental feature to leverage information about dominating
+/// conditions to compute known bits. The individual options below control how
+/// hard we search. The defaults are choosen to be fairly aggressive. If you
+/// run into compile time problems when testing, scale them back and report
+/// your findings.
+static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
+ cl::Hidden, cl::init(false));
+
+// This is expensive, so we only do it for the top level query value.
+// (TODO: evaluate cost vs profit, consider higher thresholds)
+static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
+ cl::Hidden, cl::init(1));
+
+/// How many dominating blocks should be scanned looking for dominating
+/// conditions?
+static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
+ cl::Hidden,
+ cl::init(20000));
+
+// Controls the number of uses of the value searched for possible
+// dominating comparisons.
+static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
+ cl::Hidden, cl::init(2000));
+
+// If true, don't consider only compares whose only use is a branch.
+static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
+ cl::Hidden, cl::init(false));
+
/// Returns the bitwidth of the given scalar or pointer type (if unknown returns
/// 0). For vector types, returns the element type's bitwidth.
-static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
if (unsigned BitWidth = Ty->getScalarSizeInBits())
return BitWidth;
- return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
+ return DL.getPointerTypeSizeInBits(Ty);
}
// Many of these functions have internal versions that take an assumption
@@ -97,73 +127,88 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
}
static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth,
- const Query &Q);
+ const DataLayout &DL, unsigned Depth,
+ const Query &Q);
void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth,
+ const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- ::computeKnownBits(V, KnownZero, KnownOne, TD, Depth,
+ ::computeKnownBits(V, KnownZero, KnownOne, DL, Depth,
Query(AC, safeCxtI(V, CxtI), DT));
}
+bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
+ AssumptionCache *AC, const Instruction *CxtI,
+ const DominatorTree *DT) {
+ assert(LHS->getType() == RHS->getType() &&
+ "LHS and RHS should have the same type");
+ assert(LHS->getType()->isIntOrIntVectorTy() &&
+ "LHS and RHS should be integers");
+ IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
+ APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0);
+ APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT);
+ return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+}
+
static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout *TD, unsigned Depth,
- const Query &Q);
+ const DataLayout &DL, unsigned Depth,
+ const Query &Q);
void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout *TD, unsigned Depth,
+ const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- ::ComputeSignBit(V, KnownZero, KnownOne, TD, Depth,
+ ::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth,
Query(AC, safeCxtI(V, CxtI), DT));
}
static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
- const Query &Q);
+ const Query &Q, const DataLayout &DL);
-bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
- AssumptionCache *AC, const Instruction *CxtI,
+bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI,
const DominatorTree *DT) {
return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
- Query(AC, safeCxtI(V, CxtI), DT));
+ Query(AC, safeCxtI(V, CxtI), DT), DL);
}
-static bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+static bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
const Query &Q);
-bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT) {
- return ::isKnownNonZero(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+ return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
}
-static bool MaskedValueIsZero(Value *V, const APInt &Mask,
- const DataLayout *TD, unsigned Depth,
- const Query &Q);
+static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+ unsigned Depth, const Query &Q);
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD,
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT) {
- return ::MaskedValueIsZero(V, Mask, TD, Depth,
+ return ::MaskedValueIsZero(V, Mask, DL, Depth,
Query(AC, safeCxtI(V, CxtI), DT));
}
-static unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
+static unsigned ComputeNumSignBits(Value *V, const DataLayout &DL,
unsigned Depth, const Query &Q);
-unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
+unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
- return ::ComputeNumSignBits(V, TD, Depth, Query(AC, safeCxtI(V, CxtI), DT));
+ return ::ComputeNumSignBits(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
}
static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth,
+ const DataLayout &DL, unsigned Depth,
const Query &Q) {
if (!Add) {
if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
@@ -175,7 +220,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -194,8 +239,8 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
// If an initial sequence of bits in the result is not needed, the
// corresponding bits in the operands are not needed.
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1, Q);
- computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, DL, Depth + 1, Q);
+ computeKnownBits(Op1, KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Carry in a 1 for a subtract, rather than a 0.
APInt CarryIn(BitWidth, 0);
@@ -243,11 +288,11 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth,
+ const DataLayout &DL, unsigned Depth,
const Query &Q) {
unsigned BitWidth = KnownZero.getBitWidth();
- computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(Op1, KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(Op0, KnownZero2, KnownOne2, DL, Depth + 1, Q);
bool isKnownNegative = false;
bool isKnownNonNegative = false;
@@ -268,9 +313,9 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
// negative or zero.
if (!isKnownNonNegative)
isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
- isKnownNonZero(Op0, TD, Depth, Q)) ||
+ isKnownNonZero(Op0, DL, Depth, Q)) ||
(isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
- isKnownNonZero(Op1, TD, Depth, Q));
+ isKnownNonZero(Op1, DL, Depth, Q));
}
}
@@ -382,8 +427,7 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
return false;
}
-static bool isValidAssumeForContext(Value *V, const Query &Q,
- const DataLayout *DL) {
+static bool isValidAssumeForContext(Value *V, const Query &Q) {
Instruction *Inv = cast<Instruction>(V);
// There are two restrictions on the use of an assume:
@@ -403,8 +447,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(Q.CxtI)),
IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(I, DL) &&
- !isAssumeLikeIntrinsic(I))
+ if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
return false;
return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -428,8 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
for (BasicBlock::const_iterator I =
std::next(BasicBlock::const_iterator(Q.CxtI)),
IE(Inv); I != IE; ++I)
- if (!isSafeToSpeculativelyExecute(I, DL) &&
- !isAssumeLikeIntrinsic(I))
+ if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
return false;
return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -440,10 +482,9 @@ static bool isValidAssumeForContext(Value *V, const Query &Q,
bool llvm::isValidAssumeForContext(const Instruction *I,
const Instruction *CxtI,
- const DataLayout *DL,
const DominatorTree *DT) {
- return ::isValidAssumeForContext(const_cast<Instruction*>(I),
- Query(nullptr, CxtI, DT), DL);
+ return ::isValidAssumeForContext(const_cast<Instruction *>(I),
+ Query(nullptr, CxtI, DT));
}
template<typename LHS, typename RHS>
@@ -474,9 +515,181 @@ m_c_Xor(const LHS &L, const RHS &R) {
return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
}
+/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
+/// true (at the context instruction.) This is mostly a utility function for
+/// the prototype dominating conditions reasoning below.
+static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const DataLayout &DL,
+ unsigned Depth, const Query &Q) {
+ Value *LHS = Cmp->getOperand(0);
+ Value *RHS = Cmp->getOperand(1);
+ // TODO: We could potentially be more aggressive here. This would be worth
+ // evaluating. If we can, explore commoning this code with the assume
+ // handling logic.
+ if (LHS != V && RHS != V)
+ return;
+
+ const unsigned BitWidth = KnownZero.getBitWidth();
+
+ switch (Cmp->getPredicate()) {
+ default:
+ // We know nothing from this condition
+ break;
+ // TODO: implement unsigned bound from below (known one bits)
+ // TODO: common condition check implementations with assumes
+ // TODO: implement other patterns from assume (e.g. V & B == A)
+ case ICmpInst::ICMP_SGT:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
+ // We know that the sign bit is zero.
+ KnownZero |= APInt::getSignBit(BitWidth);
+ }
+ }
+ break;
+ case ICmpInst::ICMP_EQ:
+ if (LHS == V)
+ computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+ else if (RHS == V)
+ computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q);
+ else
+ llvm_unreachable("missing use?");
+ break;
+ case ICmpInst::ICMP_ULE:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ // The known zero bits carry over
+ unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+ }
+ break;
+ case ICmpInst::ICMP_ULT:
+ if (LHS == V) {
+ APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
+ computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
+ // Whatever high bits in rhs are zero are known to be zero (if rhs is a
+ // power of 2, then one more).
+ unsigned SignBits = KnownZeroTemp.countLeadingOnes();
+ if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL))
+ SignBits++;
+ KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
+ }
+ break;
+ };
+}
+
+/// Compute known bits in 'V' from conditions which are known to be true along
+/// all paths leading to the context instruction. In particular, look for
+/// cases where one branch of an interesting condition dominates the context
+/// instruction. This does not do general dataflow.
+/// NOTE: This code is EXPERIMENTAL and currently off by default.
+static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
+ APInt &KnownOne,
+ const DataLayout &DL,
+ unsigned Depth,
+ const Query &Q) {
+ // Need both the dominator tree and the query location to do anything useful
+ if (!Q.DT || !Q.CxtI)
+ return;
+ Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+
+ // Avoid useless work
+ if (auto VI = dyn_cast<Instruction>(V))
+ if (VI->getParent() == Cxt->getParent())
+ return;
+
+ // Note: We currently implement two options. It's not clear which of these
+ // will survive long term, we need data for that.
+ // Option 1 - Try walking the dominator tree looking for conditions which
+ // might apply. This works well for local conditions (loop guards, etc..),
+ // but not as well for things far from the context instruction (presuming a
+ // low max blocks explored). If we can set an high enough limit, this would
+ // be all we need.
+ // Option 2 - We restrict out search to those conditions which are uses of
+ // the value we're interested in. This is independent of dom structure,
+ // but is slightly less powerful without looking through lots of use chains.
+ // It does handle conditions far from the context instruction (e.g. early
+ // function exits on entry) really well though.
+
+ // Option 1 - Search the dom tree
+ unsigned NumBlocksExplored = 0;
+ BasicBlock *Current = Cxt->getParent();
+ while (true) {
+ // Stop searching if we've gone too far up the chain
+ if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
+ break;
+ NumBlocksExplored++;
+
+ if (!Q.DT->getNode(Current)->getIDom())
+ break;
+ Current = Q.DT->getNode(Current)->getIDom()->getBlock();
+ if (!Current)
+ // found function entry
+ break;
+
+ BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
+ if (!BI || BI->isUnconditional())
+ continue;
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!Cmp)
+ continue;
+
+ // We're looking for conditions that are guaranteed to hold at the context
+ // instruction. Finding a condition where one path dominates the context
+ // isn't enough because both the true and false cases could merge before
+ // the context instruction we're actually interested in. Instead, we need
+ // to ensure that the taken *edge* dominates the context instruction.
+ BasicBlock *BB0 = BI->getSuccessor(0);
+ BasicBlockEdge Edge(BI->getParent(), BB0);
+ if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+ continue;
+
+ computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+ Q);
+ }
+
+ // Option 2 - Search the other uses of V
+ unsigned NumUsesExplored = 0;
+ for (auto U : V->users()) {
+ // Avoid massive lists
+ if (NumUsesExplored >= DomConditionsMaxUses)
+ break;
+ NumUsesExplored++;
+ // Consider only compare instructions uniquely controlling a branch
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+ if (!Cmp)
+ continue;
+
+ if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+ continue;
+
+ for (auto *CmpU : Cmp->users()) {
+ BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+ if (!BI || BI->isUnconditional())
+ continue;
+ // We're looking for conditions that are guaranteed to hold at the
+ // context instruction. Finding a condition where one path dominates
+ // the context isn't enough because both the true and false cases could
+ // merge before the context instruction we're actually interested in.
+ // Instead, we need to ensure that the taken *edge* dominates the context
+ // instruction.
+ BasicBlock *BB0 = BI->getSuccessor(0);
+ BasicBlockEdge Edge(BI->getParent(), BB0);
+ if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
+ continue;
+
+ computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
+ Q);
+ }
+ }
+}
+
static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
- APInt &KnownOne,
- const DataLayout *DL,
+ APInt &KnownOne, const DataLayout &DL,
unsigned Depth, const Query &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
// cannot use them!
@@ -498,14 +711,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// We're running this loop for once for each value queried resulting in a
// runtime of ~O(#assumes * #values).
- assert(isa<IntrinsicInst>(I) &&
- dyn_cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::assume &&
+ assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
"must be an assume intrinsic");
-
+
Value *Arg = I->getArgOperand(0);
- if (Arg == V &&
- isValidAssumeForContext(I, Q, DL)) {
+ if (Arg == V && isValidAssumeForContext(I, Q)) {
assert(BitWidth == 1 && "assume operand is not i1?");
KnownZero.clearAllBits();
KnownOne.setAllBits();
@@ -525,15 +736,15 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
ConstantInt *C;
// assume(v = a)
if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
KnownZero |= RHSKnownZero;
KnownOne |= RHSKnownOne;
// assume(v & b = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ } else if (match(Arg,
+ m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
@@ -546,7 +757,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(~(v & b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
@@ -557,9 +768,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne & MaskKnownOne;
KnownOne |= RHSKnownZero & MaskKnownOne;
// assume(v | b = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ } else if (match(Arg,
+ m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -572,7 +783,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(~(v | b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -583,9 +794,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownZero |= RHSKnownOne & BKnownZero;
KnownOne |= RHSKnownZero & BKnownZero;
// assume(v ^ b = a)
- } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ } else if (match(Arg,
+ m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -601,7 +812,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(~(v ^ b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
@@ -617,7 +828,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(v << c = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
@@ -627,7 +838,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(~(v << c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
@@ -637,10 +848,9 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
// assume(v >> c = a)
} else if (match(Arg,
m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
- m_AShr(m_V,
- m_ConstantInt(C))),
- m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ m_AShr(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
@@ -649,10 +859,10 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownOne |= RHSKnownOne << C->getZExtValue();
// assume(~(v >> c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
- m_LShr(m_V, m_ConstantInt(C)),
- m_AShr(m_V, m_ConstantInt(C)))),
+ m_LShr(m_V, m_ConstantInt(C)),
+ m_AShr(m_V, m_ConstantInt(C)))),
m_Value(A))) &&
- Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
@@ -661,8 +871,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
KnownOne |= RHSKnownZero << C->getZExtValue();
// assume(v >=_s c) where c is non-negative
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SGE &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -672,8 +881,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
// assume(v >_s c) where c is at least -1.
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SGT &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -683,8 +891,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
// assume(v <=_s c) where c is negative
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SLE &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -694,8 +901,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
// assume(v <_s c) where c is non-positive
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_SLT &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -705,8 +911,7 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
}
// assume(v <=_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_ULE &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
@@ -715,14 +920,13 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
// assume(v <_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
- Pred == ICmpInst::ICMP_ULT &&
- isValidAssumeForContext(I, Q, DL)) {
+ Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q)) {
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
computeKnownBits(A, RHSKnownZero, RHSKnownOne, DL, Depth+1, Query(Q, I));
// Whatever high bits in c are zero are known to be zero (if c is a power
// of 2, then one more).
- if (isKnownToBeAPowerOfTwo(A, false, Depth+1, Query(Q, I)))
+ if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I), DL))
KnownZero |=
APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1);
else
@@ -743,13 +947,12 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
/// this won't lose us code quality.
///
/// This function is defined on values with integer type, values with pointer
-/// type (but only if TD is non-null), and vectors of integers. In the case
+/// type, and vectors of integers. In the case
/// where V is a vector, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth,
- const Query &Q) {
+ const DataLayout &DL, unsigned Depth, const Query &Q) {
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = KnownZero.getBitWidth();
@@ -757,8 +960,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
assert((V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarType()->isPointerTy()) &&
"Not integer or pointer type!");
- assert((!TD ||
- TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+ assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
(!V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
@@ -797,7 +999,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// The address of an aligned GlobalValue has trailing zeros.
if (auto *GO = dyn_cast<GlobalObject>(V)) {
unsigned Align = GO->getAlignment();
- if (Align == 0 && TD) {
+ if (Align == 0) {
if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
Type *ObjectType = GVar->getType()->getElementType();
if (ObjectType->isSized()) {
@@ -805,9 +1007,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// it the preferred alignment. Otherwise, we have to assume that it
// may only have the minimum ABI alignment.
if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
- Align = TD->getPreferredAlignment(GVar);
+ Align = DL.getPreferredAlignment(GVar);
else
- Align = TD->getABITypeAlignment(ObjectType);
+ Align = DL.getABITypeAlignment(ObjectType);
}
}
}
@@ -823,11 +1025,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (Argument *A = dyn_cast<Argument>(V)) {
unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
- if (!Align && TD && A->hasStructRetAttr()) {
+ if (!Align && A->hasStructRetAttr()) {
// An sret parameter has at least the ABI alignment of the return type.
Type *EltTy = cast<PointerType>(A->getType())->getElementType();
if (EltTy->isSized())
- Align = TD->getABITypeAlignment(EltTy);
+ Align = DL.getABITypeAlignment(EltTy);
}
if (Align)
@@ -838,7 +1040,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Don't give up yet... there might be an assumption that provides more
// information...
- computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+ // Or a dominating condition for that matter
+ if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+ computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
+ Depth, Q);
return;
}
@@ -854,12 +1061,18 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// the bits of its aliasee.
if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (!GA->mayBeOverridden())
- computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth + 1, Q);
+ computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, DL, Depth + 1, Q);
return;
}
// Check whether a nearby assume intrinsic can determine some known bits.
- computeKnownBitsFromAssume(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
+
+ // Check whether there's a dominating condition which implies something about
+ // this value at the given context.
+ if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
+ computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
+ Q);
Operator *I = dyn_cast<Operator>(V);
if (!I) return;
@@ -873,8 +1086,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Output known-1 bits are only known if set in both the LHS & RHS.
KnownOne &= KnownOne2;
@@ -883,8 +1096,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Or: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Output known-0 bits are only known if clear in both the LHS & RHS.
KnownZero &= KnownZero2;
@@ -893,8 +1106,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Xor: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
@@ -905,21 +1118,20 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
}
case Instruction::Mul: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
- computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth, Q);
+ computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero,
+ KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
break;
}
case Instruction::UDiv: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
@@ -929,8 +1141,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Select:
- computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(2), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
@@ -946,8 +1158,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::AddrSpaceCast: // Pointers could be different sizes.
- // We can't handle these if we don't know the pointer size.
- if (!TD) break;
// FALL THROUGH and handle them the same as zext/trunc.
case Instruction::ZExt:
case Instruction::Trunc: {
@@ -956,17 +1166,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
unsigned SrcBitWidth;
// Note that we handle pointer operands here because of inttoptr/ptrtoint
// which fall through here.
- if(TD) {
- SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType());
- } else {
- SrcBitWidth = SrcTy->getScalarSizeInBits();
- if (!SrcBitWidth) break;
- }
+ SrcBitWidth = DL.getTypeSizeInBits(SrcTy->getScalarType());
assert(SrcBitWidth && "SrcBitWidth can't be zero");
KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero = KnownZero.zextOrTrunc(BitWidth);
KnownOne = KnownOne.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
@@ -980,7 +1185,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
break;
}
break;
@@ -991,7 +1196,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -1007,7 +1212,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero <<= ShiftAmt;
KnownOne <<= ShiftAmt;
KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
@@ -1020,7 +1225,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
// Unsigned shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
// high bits known zero.
@@ -1034,7 +1239,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -1048,15 +1253,15 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Instruction::Sub: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth, Q);
+ KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+ Depth, Q);
break;
}
case Instruction::Add: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
- Depth, Q);
+ KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+ Depth, Q);
break;
}
case Instruction::SRem:
@@ -1064,8 +1269,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD,
- Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1,
+ Q);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -1089,8 +1294,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// remainder is zero.
if (KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
- Depth+1, Q);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, DL,
+ Depth + 1, Q);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero.setBit(BitWidth - 1);
@@ -1102,8 +1307,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
APInt RA = Rem->getValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD,
- Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1,
+ Q);
KnownZero |= ~LowBits;
KnownOne &= LowBits;
break;
@@ -1112,8 +1317,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, DL, Depth + 1, Q);
unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
@@ -1125,8 +1330,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Instruction::Alloca: {
AllocaInst *AI = cast<AllocaInst>(V);
unsigned Align = AI->getAlignment();
- if (Align == 0 && TD)
- Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+ if (Align == 0)
+ Align = DL.getABITypeAlignment(AI->getType()->getElementType());
if (Align > 0)
KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
@@ -1136,8 +1341,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Analyze all of the subscripts of this getelementptr instruction
// to determine if we can prove known low zero bits.
APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
- Depth+1, Q);
+ computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, DL,
+ Depth + 1, Q);
unsigned TrailZ = LocalKnownZero.countTrailingOnes();
gep_type_iterator GTI = gep_type_begin(I);
@@ -1145,10 +1350,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Value *Index = I->getOperand(i);
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
// Handle struct member offset arithmetic.
- if (!TD) {
- TrailZ = 0;
- break;
- }
// Handle case when index is vector zeroinitializer
Constant *CIndex = cast<Constant>(Index);
@@ -1159,7 +1360,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Index = CIndex->getSplatValue();
unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
- const StructLayout *SL = TD->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
uint64_t Offset = SL->getElementOffset(Idx);
TrailZ = std::min<unsigned>(TrailZ,
countTrailingZeros(Offset));
@@ -1171,9 +1372,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
- uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+ uint64_t TypeSize = DL.getTypeAllocSize(IndexedTy);
LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1, Q);
+ computeKnownBits(Index, LocalKnownZero, LocalKnownOne, DL, Depth + 1,
+ Q);
TrailZ = std::min(TrailZ,
unsigned(countTrailingZeros(TypeSize) +
LocalKnownZero.countTrailingOnes()));
@@ -1215,11 +1417,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
- computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1, Q);
+ computeKnownBits(R, KnownZero2, KnownOne2, DL, Depth + 1, Q);
// We need to take the minimum number of known bits
APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
- computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1, Q);
+ computeKnownBits(L, KnownZero3, KnownOne3, DL, Depth + 1, Q);
KnownZero = APInt::getLowBitsSet(BitWidth,
std::min(KnownZero2.countTrailingOnes(),
@@ -1242,16 +1444,16 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = APInt::getAllOnesValue(BitWidth);
KnownOne = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
+ for (Value *IncValue : P->incoming_values()) {
// Skip direct self references.
- if (P->getIncomingValue(i) == P) continue;
+ if (IncValue == P) continue;
KnownZero2 = APInt(BitWidth, 0);
KnownOne2 = APInt(BitWidth, 0);
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
- computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
- MaxDepth-1, Q);
+ computeKnownBits(IncValue, KnownZero2, KnownOne2, DL,
+ MaxDepth - 1, Q);
KnownZero &= KnownZero2;
KnownOne &= KnownOne2;
// If all bits have been ruled out, there's no need to check
@@ -1303,19 +1505,19 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
case Intrinsic::sadd_with_overflow:
computeKnownBitsAddSub(true, II->getArgOperand(0),
II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
+ KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
computeKnownBitsAddSub(false, II->getArgOperand(0),
II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth, Q);
+ KnownOne, KnownZero2, KnownOne2, DL, Depth, Q);
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
- computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1),
- false, KnownZero, KnownOne,
- KnownZero2, KnownOne2, TD, Depth, Q);
+ computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
+ KnownZero, KnownOne, KnownZero2, KnownOne2, DL,
+ Depth, Q);
break;
}
}
@@ -1328,9 +1530,8 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
/// Determine whether the sign bit is known to be zero or one.
/// Convenience wrapper around computeKnownBits.
void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
- const DataLayout *TD, unsigned Depth,
- const Query &Q) {
- unsigned BitWidth = getBitWidth(V->getType(), TD);
+ const DataLayout &DL, unsigned Depth, const Query &Q) {
+ unsigned BitWidth = getBitWidth(V->getType(), DL);
if (!BitWidth) {
KnownZero = false;
KnownOne = false;
@@ -1338,7 +1539,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
}
APInt ZeroBits(BitWidth, 0);
APInt OneBits(BitWidth, 0);
- computeKnownBits(V, ZeroBits, OneBits, TD, Depth, Q);
+ computeKnownBits(V, ZeroBits, OneBits, DL, Depth, Q);
KnownOne = OneBits[BitWidth - 1];
KnownZero = ZeroBits[BitWidth - 1];
}
@@ -1348,7 +1549,7 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
/// be a power of two when defined. Supports values with integer or pointer
/// types and vectors of integers.
bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
- const Query &Q) {
+ const Query &Q, const DataLayout &DL) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
return OrZero;
@@ -1375,20 +1576,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
// A shift of a power of two is a power of two or zero.
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
match(V, m_Shr(m_Value(X), m_Value()))))
- return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q);
+ return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL);
if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
- return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
+ return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q, DL);
if (SelectInst *SI = dyn_cast<SelectInst>(V))
- return
- isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
- isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
+ return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q, DL) &&
+ isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q, DL);
if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
// A power of two and'd with anything is a power of two or zero.
- if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth, Q) ||
- isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth, Q))
+ if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q, DL) ||
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q, DL))
return true;
// X & (-X) is always a power of two or zero.
if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
@@ -1403,19 +1603,19 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
if (match(X, m_And(m_Specific(Y), m_Value())) ||
match(X, m_And(m_Value(), m_Specific(Y))))
- if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
+ if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q, DL))
return true;
if (match(Y, m_And(m_Specific(X), m_Value())) ||
match(Y, m_And(m_Value(), m_Specific(X))))
- if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
+ if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q, DL))
return true;
unsigned BitWidth = V->getType()->getScalarSizeInBits();
APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
- computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth, Q);
+ computeKnownBits(X, LHSZeroBits, LHSOneBits, DL, Depth, Q);
APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
- computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth, Q);
+ computeKnownBits(Y, RHSZeroBits, RHSOneBits, DL, Depth, Q);
// If i8 V is a power of two or zero:
// ZeroBits: 1 1 1 0 1 1 1 1
// ~ZeroBits: 0 0 0 1 0 0 0 0
@@ -1433,7 +1633,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
- Depth, Q);
+ Depth, Q, DL);
}
return false;
@@ -1445,7 +1645,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
/// to be non-null.
///
/// Currently this routine does not support vector GEPs.
-static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
+static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout &DL,
unsigned Depth, const Query &Q) {
if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
return false;
@@ -1458,10 +1658,6 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth, Q))
return true;
- // Past this, if we don't have DataLayout, we can't do much.
- if (!DL)
- return false;
-
// Walk the GEP operands and see if any operand introduces a non-zero offset.
// If so, then the GEP cannot produce a null pointer, as doing so would
// inherently violate the inbounds contract within address space zero.
@@ -1471,7 +1667,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
unsigned ElementIdx = OpC->getZExtValue();
- const StructLayout *SL = DL->getStructLayout(STy);
+ const StructLayout *SL = DL.getStructLayout(STy);
uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
if (ElementOffset > 0)
return true;
@@ -1479,7 +1675,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
}
// If we have a zero-sized type, the index doesn't matter. Keep looping.
- if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0)
+ if (DL.getTypeAllocSize(GTI.getIndexedType()) == 0)
continue;
// Fast path the constant operand case both for efficiency and so we don't
@@ -1528,7 +1724,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges,
/// For vectors return true if every element is known to be non-zero when
/// defined. Supports values with integer or pointer type and vectors of
/// integers.
-bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
+bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
const Query &Q) {
if (Constant *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
@@ -1561,21 +1757,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
if (isKnownNonNull(V))
return true;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- if (isGEPKnownNonNull(GEP, TD, Depth, Q))
+ if (isGEPKnownNonNull(GEP, DL, Depth, Q))
return true;
}
- unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD);
+ unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), DL);
// X | Y != 0 if X != 0 or Y != 0.
Value *X = nullptr, *Y = nullptr;
if (match(V, m_Or(m_Value(X), m_Value(Y))))
- return isKnownNonZero(X, TD, Depth, Q) ||
- isKnownNonZero(Y, TD, Depth, Q);
+ return isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q);
// ext X != 0 if X != 0.
if (isa<SExtInst>(V) || isa<ZExtInst>(V))
- return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth, Q);
+ return isKnownNonZero(cast<Instruction>(V)->getOperand(0), DL, Depth, Q);
// shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
// if the lowest bit is shifted off the end.
@@ -1583,11 +1778,11 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
// shl nuw can't remove any non-zero bits.
OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
if (BO->hasNoUnsignedWrap())
- return isKnownNonZero(X, TD, Depth, Q);
+ return isKnownNonZero(X, DL, Depth, Q);
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
if (KnownOne[0])
return true;
}
@@ -1597,29 +1792,28 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
// shr exact can only shift out zero bits.
PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
if (BO->isExact())
- return isKnownNonZero(X, TD, Depth, Q);
+ return isKnownNonZero(X, DL, Depth, Q);
bool XKnownNonNegative, XKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
if (XKnownNegative)
return true;
}
// div exact can only produce a zero if the dividend is zero.
else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
- return isKnownNonZero(X, TD, Depth, Q);
+ return isKnownNonZero(X, DL, Depth, Q);
}
// X + Y.
else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
bool XKnownNonNegative, XKnownNegative;
bool YKnownNonNegative, YKnownNegative;
- ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth, Q);
- ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth, Q);
+ ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
+ ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, DL, Depth, Q);
// If X and Y are both non-negative (as signed values) then their sum is not
// zero unless both X and Y are zero.
if (XKnownNonNegative && YKnownNonNegative)
- if (isKnownNonZero(X, TD, Depth, Q) ||
- isKnownNonZero(Y, TD, Depth, Q))
+ if (isKnownNonZero(X, DL, Depth, Q) || isKnownNonZero(Y, DL, Depth, Q))
return true;
// If X and Y are both negative (as signed values) then their sum is not
@@ -1630,22 +1824,22 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
APInt Mask = APInt::getSignedMaxValue(BitWidth);
// The sign bit of X is set. If some other bit is set then X is not equal
// to INT_MIN.
- computeKnownBits(X, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
if ((KnownOne & Mask) != 0)
return true;
// The sign bit of Y is set. If some other bit is set then Y is not equal
// to INT_MIN.
- computeKnownBits(Y, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(Y, KnownZero, KnownOne, DL, Depth, Q);
if ((KnownOne & Mask) != 0)
return true;
}
// The sum of a non-negative number and a power of two is not zero.
if (XKnownNonNegative &&
- isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth, Q))
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q, DL))
return true;
if (YKnownNonNegative &&
- isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth, Q))
+ isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q, DL))
return true;
}
// X * Y.
@@ -1654,21 +1848,20 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
// If X and Y are non-zero then so is X * Y as long as the multiplication
// does not overflow.
if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
- isKnownNonZero(X, TD, Depth, Q) &&
- isKnownNonZero(Y, TD, Depth, Q))
+ isKnownNonZero(X, DL, Depth, Q) && isKnownNonZero(Y, DL, Depth, Q))
return true;
}
// (C ? X : Y) != 0 if X != 0 and Y != 0.
else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
- if (isKnownNonZero(SI->getTrueValue(), TD, Depth, Q) &&
- isKnownNonZero(SI->getFalseValue(), TD, Depth, Q))
+ if (isKnownNonZero(SI->getTrueValue(), DL, Depth, Q) &&
+ isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))
return true;
}
if (!BitWidth) return false;
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
return KnownOne != 0;
}
@@ -1677,15 +1870,14 @@ bool isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth,
/// cannot have.
///
/// This function is defined on values with integer type, values with pointer
-/// type (but only if TD is non-null), and vectors of integers. In the case
+/// type, and vectors of integers. In the case
/// where V is a vector, the mask, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-bool MaskedValueIsZero(Value *V, const APInt &Mask,
- const DataLayout *TD, unsigned Depth,
- const Query &Q) {
+bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+ unsigned Depth, const Query &Q) {
APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
return (KnownZero & Mask) == Mask;
}
@@ -1699,14 +1891,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask,
///
/// 'Op' must have a scalar integer type.
///
-unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
- unsigned Depth, const Query &Q) {
- assert((TD || V->getType()->isIntOrIntVectorTy()) &&
- "ComputeNumSignBits requires a DataLayout object to operate "
- "on non-integer values!");
- Type *Ty = V->getType();
- unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
- Ty->getScalarSizeInBits();
+unsigned ComputeNumSignBits(Value *V, const DataLayout &DL, unsigned Depth,
+ const Query &Q) {
+ unsigned TyBits = DL.getTypeSizeInBits(V->getType()->getScalarType());
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
@@ -1721,10 +1908,63 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
default: break;
case Instruction::SExt:
Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
- return ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q) + Tmp;
+ return ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q) + Tmp;
+
+ case Instruction::SDiv: {
+ const APInt *Denominator;
+ // sdiv X, C -> adds log(C) sign bits.
+ if (match(U->getOperand(1), m_APInt(Denominator))) {
+
+ // Ignore non-positive denominator.
+ if (!Denominator->isStrictlyPositive())
+ break;
+
+ // Calculate the incoming numerator bits.
+ unsigned NumBits = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+
+ // Add floor(log(C)) bits to the numerator bits.
+ return std::min(TyBits, NumBits + Denominator->logBase2());
+ }
+ break;
+ }
+
+ case Instruction::SRem: {
+ const APInt *Denominator;
+ // srem X, C -> we know that the result is within [-C+1,C) when C is a
+ // positive constant. This let us put a lower bound on the number of sign
+ // bits.
+ if (match(U->getOperand(1), m_APInt(Denominator))) {
+
+ // Ignore non-positive denominator.
+ if (!Denominator->isStrictlyPositive())
+ break;
+
+ // Calculate the incoming numerator bits. SRem by a positive constant
+ // can't lower the number of sign bits.
+ unsigned NumrBits =
+ ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
+
+ // Calculate the leading sign bit constraints by examining the
+ // denominator. Given that the denominator is positive, there are two
+ // cases:
+ //
+ // 1. the numerator is positive. The result range is [0,C) and [0,C) u<
+ // (1 << ceilLogBase2(C)).
+ //
+ // 2. the numerator is negative. Then the result range is (-C,0] and
+ // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
+ //
+ // Thus a lower bound on the number of sign bits is `TyBits -
+ // ceilLogBase2(C)`.
+
+ unsigned ResBits = TyBits - Denominator->ceilLogBase2();
+ return std::max(NumrBits, ResBits);
+ }
+ break;
+ }
case Instruction::AShr: {
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
// ashr X, C -> adds C sign bits. Vectors too.
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
@@ -1737,7 +1977,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
// shl destroys sign bits.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
Tmp2 = ShAmt->getZExtValue();
if (Tmp2 >= TyBits || // Bad shift.
Tmp2 >= Tmp) break; // Shifted all sign bits out.
@@ -1749,9 +1989,9 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
case Instruction::Or:
case Instruction::Xor: // NOT is handled here.
// Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
@@ -1760,22 +2000,23 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
break;
case Instruction::Select:
- Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1, Q);
+ Tmp2 = ComputeNumSignBits(U->getOperand(2), DL, Depth + 1, Q);
return std::min(Tmp, Tmp2);
case Instruction::Add:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
if (Tmp == 1) return 1; // Early out.
// Special case decrementing a value (ADD X, -1):
if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL, Depth + 1,
+ Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -1788,19 +2029,20 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
return Tmp;
}
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
if (Tmp2 == 1) return 1;
return std::min(Tmp, Tmp2)-1;
case Instruction::Sub:
- Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1, Q);
+ Tmp2 = ComputeNumSignBits(U->getOperand(1), DL, Depth + 1, Q);
if (Tmp2 == 1) return 1;
// Handle NEG.
if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1, Q);
+ computeKnownBits(U->getOperand(1), KnownZero, KnownOne, DL, Depth + 1,
+ Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
@@ -1816,7 +2058,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
// Sub can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
- Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(U->getOperand(0), DL, Depth + 1, Q);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
@@ -1830,12 +2072,11 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
- Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1, Q);
+ Tmp = ComputeNumSignBits(PN->getIncomingValue(0), DL, Depth + 1, Q);
for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
if (Tmp == 1) return Tmp;
- Tmp = std::min(Tmp,
- ComputeNumSignBits(PN->getIncomingValue(i), TD,
- Depth+1, Q));
+ Tmp = std::min(
+ Tmp, ComputeNumSignBits(PN->getIncomingValue(i), DL, Depth + 1, Q));
}
return Tmp;
}
@@ -1850,7 +2091,7 @@ unsigned ComputeNumSignBits(Value *V, const DataLayout *TD,
// use this information.
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
APInt Mask;
- computeKnownBits(V, KnownZero, KnownOne, TD, Depth, Q);
+ computeKnownBits(V, KnownZero, KnownOne, DL, Depth, Q);
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
@@ -2000,8 +2241,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
return !CFP->getValueAPF().isNegZero();
+ // FIXME: Magic number! At the least, this should be given a name because it's
+ // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
+ // expose it as a parameter, so it can be used for testing / experimenting.
if (Depth == 6)
- return 1; // Limit search depth.
+ return false; // Limit search depth.
const Operator *I = dyn_cast<Operator>(V);
if (!I) return false;
@@ -2044,6 +2288,62 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return false;
}
+bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+ return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
+
+ // FIXME: Magic number! At the least, this should be given a name because it's
+ // used similarly in CannotBeNegativeZero(). A better fix may be to
+ // expose it as a parameter, so it can be used for testing / experimenting.
+ if (Depth == 6)
+ return false; // Limit search depth.
+
+ const Operator *I = dyn_cast<Operator>(V);
+ if (!I) return false;
+
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::FMul:
+ // x*x is always non-negative or a NaN.
+ if (I->getOperand(0) == I->getOperand(1))
+ return true;
+ // Fall through
+ case Instruction::FAdd:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
+ CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+ case Instruction::FPExt:
+ case Instruction::FPTrunc:
+ // Widening/narrowing never change sign.
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::fabs:
+ case Intrinsic::sqrt:
+ return true;
+ case Intrinsic::powi:
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ // powi(x,n) is non-negative if n is even.
+ if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
+ return true;
+ }
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1);
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ // x*x+y is non-negative if y is non-negative.
+ return I->getOperand(0) == I->getOperand(1) &&
+ CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1);
+ }
+ break;
+ }
+ return false;
+}
+
/// If the specified value can be set by repeating the same byte in memory,
/// return the i8 value that it is represented with. This is
/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
@@ -2068,26 +2368,14 @@ Value *llvm::isBytewiseValue(Value *V) {
// Don't handle long double formats, which have strange constraints.
}
- // We can handle constant integers that are power of two in size and a
- // multiple of 8 bits.
+ // We can handle constant integers that are multiple of 8 bits.
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- unsigned Width = CI->getBitWidth();
- if (isPowerOf2_32(Width) && Width > 8) {
- // We can handle this value if the recursive binary decomposition is the
- // same at all levels.
- APInt Val = CI->getValue();
- APInt Val2;
- while (Val.getBitWidth() != 8) {
- unsigned NextWidth = Val.getBitWidth()/2;
- Val2 = Val.lshr(NextWidth);
- Val2 = Val2.trunc(Val.getBitWidth()/2);
- Val = Val.trunc(Val.getBitWidth()/2);
-
- // If the top/bottom halves aren't the same, reject it.
- if (Val != Val2)
- return nullptr;
- }
- return ConstantInt::get(V->getContext(), Val);
+ if (CI->getBitWidth() % 8 == 0) {
+ assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
+
+ if (!CI->getValue().isSplat(8))
+ return nullptr;
+ return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
}
}
@@ -2286,23 +2574,19 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
/// Analyze the specified pointer to see if it can be expressed as a base
/// pointer plus a constant offset. Return the base and offset to the caller.
Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
- const DataLayout *DL) {
- // Without DataLayout, conservatively assume 64-bit offsets, which is
- // the widest we support.
- unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64;
+ const DataLayout &DL) {
+ unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
APInt ByteOffset(BitWidth, 0);
while (1) {
if (Ptr->getType()->isVectorTy())
break;
if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- if (DL) {
- APInt GEPOffset(BitWidth, 0);
- if (!GEP->accumulateConstantOffset(*DL, GEPOffset))
- break;
+ APInt GEPOffset(BitWidth, 0);
+ if (!GEP->accumulateConstantOffset(DL, GEPOffset))
+ break;
- ByteOffset += GEPOffset;
- }
+ ByteOffset += GEPOffset;
Ptr = GEP->getPointerOperand();
} else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -2331,7 +2615,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
// Look through bitcast instructions and geps.
V = V->stripPointerCasts();
- // If the value is a GEP instructionor constant expression, treat it as an
+ // If the value is a GEP instruction or constant expression, treat it as an
// offset.
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
// Make sure the GEP has exactly three arguments.
@@ -2358,7 +2642,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
StartIdx = CI->getZExtValue();
else
return false;
- return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset);
+ return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset,
+ TrimAtNul);
}
// The GEP instruction, constant or instruction, must reference a global
@@ -2421,8 +2706,8 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
// If it was new, see if all the input strings are the same length.
uint64_t LenSoFar = ~0ULL;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+ for (Value *IncValue : PN->incoming_values()) {
+ uint64_t Len = GetStringLengthH(IncValue, PHIs);
if (Len == 0) return 0; // Unknown length -> unknown.
if (Len == ~0ULL) continue;
@@ -2468,8 +2753,34 @@ uint64_t llvm::GetStringLength(Value *V) {
return Len == ~0ULL ? 1 : Len;
}
-Value *
-llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
+/// \brief \p PN defines a loop-variant pointer to an object. Check if the
+/// previous iteration of the loop was referring to the same object as \p PN.
+static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) {
+ // Find the loop-defined value.
+ Loop *L = LI->getLoopFor(PN->getParent());
+ if (PN->getNumIncomingValues() != 2)
+ return true;
+
+ // Find the value from previous iteration.
+ auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
+ if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+ PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
+ if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
+ return true;
+
+ // If a new pointer is loaded in the loop, the pointer references a different
+ // object in every iteration. E.g.:
+ // for (i)
+ // int *p = a[i];
+ // ...
+ if (auto *Load = dyn_cast<LoadInst>(PrevValue))
+ if (!L->isLoopInvariant(Load->getPointerOperand()))
+ return false;
+ return true;
+}
+
+Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
+ unsigned MaxLookup) {
if (!V->getType()->isPointerTy())
return V;
for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
@@ -2486,7 +2797,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
// See if InstructionSimplify knows any relevant tricks.
if (Instruction *I = dyn_cast<Instruction>(V))
// TODO: Acquire a DominatorTree and AssumptionCache and use them.
- if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) {
+ if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) {
V = Simplified;
continue;
}
@@ -2498,17 +2809,15 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
return V;
}
-void
-llvm::GetUnderlyingObjects(Value *V,
- SmallVectorImpl<Value *> &Objects,
- const DataLayout *TD,
- unsigned MaxLookup) {
+void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+ const DataLayout &DL, LoopInfo *LI,
+ unsigned MaxLookup) {
SmallPtrSet<Value *, 4> Visited;
SmallVector<Value *, 4> Worklist;
Worklist.push_back(V);
do {
Value *P = Worklist.pop_back_val();
- P = GetUnderlyingObject(P, TD, MaxLookup);
+ P = GetUnderlyingObject(P, DL, MaxLookup);
if (!Visited.insert(P).second)
continue;
@@ -2520,8 +2829,20 @@ llvm::GetUnderlyingObjects(Value *V,
}
if (PHINode *PN = dyn_cast<PHINode>(P)) {
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- Worklist.push_back(PN->getIncomingValue(i));
+ // If this PHI changes the underlying object in every iteration of the
+ // loop, don't look through it. Consider:
+ // int **A;
+ // for (i) {
+ // Prev = Curr; // Prev = PHI (Prev_0, Curr)
+ // Curr = A[i];
+ // *Prev, *Curr;
+ //
+ // Prev is tracking Curr one iteration behind so they refer to different
+ // underlying objects.
+ if (!LI || !LI->isLoopHeader(PN->getParent()) ||
+ isSameUnderlyingObjectInLoop(PN, LI))
+ for (Value *IncValue : PN->incoming_values())
+ Worklist.push_back(IncValue);
continue;
}
@@ -2542,8 +2863,188 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
return true;
}
+static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset,
+ Type *Ty, const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
+ assert(Offset.isNonNegative() && "offset can't be negative");
+ assert(Ty->isSized() && "must be sized");
+
+ APInt DerefBytes(Offset.getBitWidth(), 0);
+ bool CheckForNonNull = false;
+ if (const Argument *A = dyn_cast<Argument>(BV)) {
+ DerefBytes = A->getDereferenceableBytes();
+ if (!DerefBytes.getBoolValue()) {
+ DerefBytes = A->getDereferenceableOrNullBytes();
+ CheckForNonNull = true;
+ }
+ } else if (auto CS = ImmutableCallSite(BV)) {
+ DerefBytes = CS.getDereferenceableBytes(0);
+ if (!DerefBytes.getBoolValue()) {
+ DerefBytes = CS.getDereferenceableOrNullBytes(0);
+ CheckForNonNull = true;
+ }
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(BV)) {
+ if (MDNode *MD = LI->getMetadata(LLVMContext::MD_dereferenceable)) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ DerefBytes = CI->getLimitedValue();
+ }
+ if (!DerefBytes.getBoolValue()) {
+ if (MDNode *MD =
+ LI->getMetadata(LLVMContext::MD_dereferenceable_or_null)) {
+ ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+ DerefBytes = CI->getLimitedValue();
+ }
+ CheckForNonNull = true;
+ }
+ }
+
+ if (DerefBytes.getBoolValue())
+ if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty)))
+ if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI))
+ return true;
+
+ return false;
+}
+
+static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
+ Type *VTy = V->getType();
+ Type *Ty = VTy->getPointerElementType();
+ if (!Ty->isSized())
+ return false;
+
+ APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
+ return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);
+}
+
+/// Return true if Value is always a dereferenceable pointer.
+///
+/// Test if V is always a pointer to allocated and suitably aligned memory for
+/// a simple load or store.
+static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI,
+ SmallPtrSetImpl<const Value *> &Visited) {
+ // Note that it is not safe to speculate into a malloc'd region because
+ // malloc may return null.
+
+ // These are obviously ok.
+ if (isa<AllocaInst>(V)) return true;
+
+ // It's not always safe to follow a bitcast, for example:
+ // bitcast i8* (alloca i8) to i32*
+ // would result in a 4-byte load from a 1-byte alloca. However,
+ // if we're casting from a pointer from a type of larger size
+ // to a type of smaller size (or the same size), and the alignment
+ // is at least as large as for the resulting pointer type, then
+ // we can look through the bitcast.
+ if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) {
+ Type *STy = BC->getSrcTy()->getPointerElementType(),
+ *DTy = BC->getDestTy()->getPointerElementType();
+ if (STy->isSized() && DTy->isSized() &&
+ (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
+ (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
+ return isDereferenceablePointer(BC->getOperand(0), DL, CtxI,
+ DT, TLI, Visited);
+ }
+
+ // Global variables which can't collapse to null are ok.
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+ return !GV->hasExternalWeakLinkage();
+
+ // byval arguments are okay.
+ if (const Argument *A = dyn_cast<Argument>(V))
+ if (A->hasByValAttr())
+ return true;
+
+ if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI))
+ return true;
+
+ // For GEPs, determine if the indexing lands within the allocated object.
+ if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ // Conservatively require that the base pointer be fully dereferenceable.
+ if (!Visited.insert(GEP->getOperand(0)).second)
+ return false;
+ if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI,
+ DT, TLI, Visited))
+ return false;
+ // Check the indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::const_op_iterator I = GEP->op_begin()+1,
+ E = GEP->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ Type *Ty = *GTI++;
+ // Struct indices can't be out of bounds.
+ if (isa<StructType>(Ty))
+ continue;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+ if (!CI)
+ return false;
+ // Zero is always ok.
+ if (CI->isZero())
+ continue;
+ // Check to see that it's within the bounds of an array.
+ ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ if (!ATy)
+ return false;
+ if (CI->getValue().getActiveBits() > 64)
+ return false;
+ if (CI->getZExtValue() >= ATy->getNumElements())
+ return false;
+ }
+ // Indices check out; this is dereferenceable.
+ return true;
+ }
+
+ // For gc.relocate, look through relocations
+ if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
+ if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
+ GCRelocateOperands RelocateInst(I);
+ return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI,
+ DT, TLI, Visited);
+ }
+
+ if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
+ return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI,
+ DT, TLI, Visited);
+
+ // If we don't know, assume the worst.
+ return false;
+}
+
+bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
+ // When dereferenceability information is provided by a dereferenceable
+ // attribute, we know exactly how many bytes are dereferenceable. If we can
+ // determine the exact offset to the attributed variable, we can use that
+ // information here.
+ Type *VTy = V->getType();
+ Type *Ty = VTy->getPointerElementType();
+ if (Ty->isSized()) {
+ APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
+ const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+ if (Offset.isNonNegative())
+ if (isDereferenceableFromAttribute(BV, Offset, Ty, DL,
+ CtxI, DT, TLI))
+ return true;
+ }
+
+ SmallPtrSet<const Value *, 32> Visited;
+ return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited);
+}
+
bool llvm::isSafeToSpeculativelyExecute(const Value *V,
- const DataLayout *TD) {
+ const Instruction *CtxI,
+ const DominatorTree *DT,
+ const TargetLibraryInfo *TLI) {
const Operator *Inst = dyn_cast<Operator>(V);
if (!Inst)
return false;
@@ -2567,20 +3068,20 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
case Instruction::SDiv:
case Instruction::SRem: {
// x / y is undefined if y == 0 or x == INT_MIN and y == -1
- const APInt *X, *Y;
- if (match(Inst->getOperand(1), m_APInt(Y))) {
- if (*Y != 0) {
- if (*Y == -1) {
- // The numerator can't be MinSignedValue if the denominator is -1.
- if (match(Inst->getOperand(0), m_APInt(X)))
- return !Y->isMinSignedValue();
- // The numerator *might* be MinSignedValue.
- return false;
- }
- // The denominator is not 0 or -1, it's safe to proceed.
- return true;
- }
- }
+ const APInt *Numerator, *Denominator;
+ if (!match(Inst->getOperand(1), m_APInt(Denominator)))
+ return false;
+ // We cannot hoist this division if the denominator is 0.
+ if (*Denominator == 0)
+ return false;
+ // It's safe to hoist if the denominator is not 0 or -1.
+ if (*Denominator != -1)
+ return true;
+ // At this point we know that the denominator is -1. It is safe to hoist as
+ // long we know that the numerator is not INT_MIN.
+ if (match(Inst->getOperand(0), m_APInt(Numerator)))
+ return !Numerator->isMinSignedValue();
+ // The numerator *might* be MinSignedValue.
return false;
}
case Instruction::Load: {
@@ -2589,7 +3090,8 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
// Speculative load may create a race that did not exist in the source.
LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return false;
- return LI->getPointerOperand()->isDereferenceablePointer(TD);
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI);
}
case Instruction::Call: {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -2669,7 +3171,7 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
if (const LoadInst *LI = dyn_cast<LoadInst>(V))
return LI->getMetadata(LLVMContext::MD_nonnull);
- if (ImmutableCallSite CS = V)
+ if (auto CS = ImmutableCallSite(V))
if (CS.isReturnNonNull())
return true;
@@ -2680,8 +3182,62 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
return false;
}
+static bool isKnownNonNullFromDominatingCondition(const Value *V,
+ const Instruction *CtxI,
+ const DominatorTree *DT) {
+ unsigned NumUsesExplored = 0;
+ for (auto U : V->users()) {
+ // Avoid massive lists
+ if (NumUsesExplored >= DomConditionsMaxUses)
+ break;
+ NumUsesExplored++;
+ // Consider only compare instructions uniquely controlling a branch
+ const ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
+ if (!Cmp)
+ continue;
+
+ if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
+ continue;
+
+ for (auto *CmpU : Cmp->users()) {
+ const BranchInst *BI = dyn_cast<BranchInst>(CmpU);
+ if (!BI)
+ continue;
+
+ assert(BI->isConditional() && "uses a comparison!");
+
+ BasicBlock *NonNullSuccessor = nullptr;
+ CmpInst::Predicate Pred;
+
+ if (match(const_cast<ICmpInst*>(Cmp),
+ m_c_ICmp(Pred, m_Specific(V), m_Zero()))) {
+ if (Pred == ICmpInst::ICMP_EQ)
+ NonNullSuccessor = BI->getSuccessor(1);
+ else if (Pred == ICmpInst::ICMP_NE)
+ NonNullSuccessor = BI->getSuccessor(0);
+ }
+
+ if (NonNullSuccessor) {
+ BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
+ if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
+ const DominatorTree *DT, const TargetLibraryInfo *TLI) {
+ if (isKnownNonNull(V, TLI))
+ return true;
+
+ return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false;
+}
+
OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
- const DataLayout *DL,
+ const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
@@ -2731,7 +3287,7 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
}
OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
- const DataLayout *DL,
+ const DataLayout &DL,
AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
@@ -2758,3 +3314,133 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
return OverflowResult::MayOverflow;
}
+
+static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
+ Value *CmpLHS, Value *CmpRHS,
+ Value *TrueVal, Value *FalseVal,
+ Value *&LHS, Value *&RHS) {
+ LHS = CmpLHS;
+ RHS = CmpRHS;
+
+ // (icmp X, Y) ? X : Y
+ if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
+ switch (Pred) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMAX;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMAX;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMIN;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMIN;
+ }
+ }
+
+ // (icmp X, Y) ? Y : X
+ if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+ switch (Pred) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMIN;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMIN;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMAX;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ }
+ }
+
+ if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
+ if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
+ (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
+
+ // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
+ // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
+ if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
+ return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
+ }
+
+ // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
+ // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
+ if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
+ return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
+ }
+ }
+
+ // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C)
+ if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) {
+ if (C1->getType() == C2->getType() && ~C1->getValue() == C2->getValue() &&
+ (match(TrueVal, m_Not(m_Specific(CmpLHS))) ||
+ match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
+ LHS = TrueVal;
+ RHS = FalseVal;
+ return SPF_SMIN;
+ }
+ }
+ }
+
+ // TODO: (X > 4) ? X : 5 --> (X >= 5) ? X : 5 --> MAX(X, 5)
+
+ return SPF_UNKNOWN;
+}
+
+static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
+ Instruction::CastOps *CastOp) {
+ CastInst *CI = dyn_cast<CastInst>(V1);
+ Constant *C = dyn_cast<Constant>(V2);
+ if (!CI || !C)
+ return nullptr;
+ *CastOp = CI->getOpcode();
+
+ if (isa<SExtInst>(CI) && CmpI->isSigned()) {
+ Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy());
+ // This is only valid if the truncated value can be sign-extended
+ // back to the original value.
+ if (ConstantExpr::getSExt(T, C->getType()) == C)
+ return T;
+ return nullptr;
+ }
+ if (isa<ZExtInst>(CI) && CmpI->isUnsigned())
+ return ConstantExpr::getTrunc(C, CI->getSrcTy());
+
+ if (isa<TruncInst>(CI))
+ return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
+
+ return nullptr;
+}
+
+SelectPatternFlavor llvm::matchSelectPattern(Value *V,
+ Value *&LHS, Value *&RHS,
+ Instruction::CastOps *CastOp) {
+ SelectInst *SI = dyn_cast<SelectInst>(V);
+ if (!SI) return SPF_UNKNOWN;
+
+ ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition());
+ if (!CmpI) return SPF_UNKNOWN;
+
+ ICmpInst::Predicate Pred = CmpI->getPredicate();
+ Value *CmpLHS = CmpI->getOperand(0);
+ Value *CmpRHS = CmpI->getOperand(1);
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ // Bail out early.
+ if (CmpI->isEquality())
+ return SPF_UNKNOWN;
+
+ // Deal with type mismatches.
+ if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
+ if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+ return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+ cast<CastInst>(TrueVal)->getOperand(0), C,
+ LHS, RHS);
+ if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+ return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+ C, cast<CastInst>(FalseVal)->getOperand(0),
+ LHS, RHS);
+ }
+ return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal,
+ LHS, RHS);
+}
OpenPOWER on IntegriCloud