diff options
Diffstat (limited to 'lib')
178 files changed, 7255 insertions, 2888 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 0a83c3d..17c9b86 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_library(LLVMAnalysis LoopPass.cpp MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp + PHITransAddr.cpp PointerTracking.cpp PostDominators.cpp ProfileEstimatorPass.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index a276c64..10a8b11 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -25,6 +25,16 @@ #include "llvm/Support/CallSite.h" using namespace llvm; +/// As its comment mentions, PointerMayBeCaptured can be expensive. +/// However, it's not easy for BasicAA to cache the result, because +/// it's an ImmutablePass. To work around this, bound queries at a +/// fixed number of uses. +/// +/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep +/// a cache. Then we can move the code from BasicAliasAnalysis into +/// that path, and remove this threshold. +static int const Threshold = 20; + /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can /// be expensive, so consider caching the results. The boolean ReturnCaptures @@ -35,11 +45,17 @@ using namespace llvm; bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!"); - SmallVector<Use*, 16> Worklist; - SmallSet<Use*, 16> Visited; + SmallVector<Use*, Threshold> Worklist; + SmallSet<Use*, Threshold> Visited; + int Count = 0; for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { + // If there are lots of uses, conservatively say that the value + // is captured to avoid taking too much compile time. + if (Count++ >= Threshold) + return true; + Use *U = &UI.getUse(); Visited.insert(U); Worklist.push_back(U); diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 96f738e..eaf90d0 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -432,7 +432,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. std::string Str; - if (TD && GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) { + if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) { unsigned StrLen = Str.length(); const Type *Ty = cast<PointerType>(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); @@ -569,9 +569,16 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, SmallVector<Constant*, 32> NewIdxs; do { if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) { - // The only pointer indexing we'll do is on the first index of the GEP. - if (isa<PointerType>(ATy) && !NewIdxs.empty()) - break; + if (isa<PointerType>(ATy)) { + // The only pointer indexing we'll do is on the first index of the GEP. + if (!NewIdxs.empty()) + break; + + // Only handle pointers to sized types, not pointers to functions. + if (!ATy->getElementType()->isSized()) + return 0; + } + // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); if (ElemSize == 0) diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 41d803c..1c9f500 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -866,7 +866,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, - bool isDefinition) { + bool isDefinition, + unsigned VK, unsigned VIndex, + DIType ContainingType) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), @@ -879,9 +881,38 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), Type.getNode(), ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition) + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), + ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), + ContainingType.getNode() }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 11)); + return DISubprogram(MDNode::get(VMContext, &Elts[0], 14)); +} + +/// CreateSubprogramDefinition - Create new subprogram descriptor for the +/// given declaration. +DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) { + if (SPDeclaration.isDefinition()) + return DISubprogram(SPDeclaration.getNode()); + + MDNode *DeclNode = SPDeclaration.getNode(); + Value *Elts[] = { + GetTagConstant(dwarf::DW_TAG_subprogram), + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + DeclNode->getElement(2), // Context + DeclNode->getElement(3), // Name + DeclNode->getElement(4), // DisplayName + DeclNode->getElement(5), // LinkageName + DeclNode->getElement(6), // CompileUnit + DeclNode->getElement(7), // LineNo + DeclNode->getElement(8), // Type + DeclNode->getElement(9), // isLocalToUnit + ConstantInt::get(Type::getInt1Ty(VMContext), true), + DeclNode->getElement(11), // Virtuality + DeclNode->getElement(12), // VIndex + DeclNode->getElement(13) // Containting Type + }; + return DISubprogram(MDNode::get(VMContext, &Elts[0], 14)); } /// CreateGlobalVariable - Create a new descriptor for the specified global. @@ -1019,6 +1050,37 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); } +/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset, + DIVariable D, + Instruction *InsertBefore) { + assert(V && "no value passed to dbg.value"); + assert(Offset->getType() == Type::getInt64Ty(V->getContext()) && + "offset must be i64"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Elts[] = { V }; + Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset, + D.getNode() }; + return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); +} + +/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset, + DIVariable D, + BasicBlock *InsertAtEnd) { + assert(V && "no value passed to dbg.value"); + assert(Offset->getType() == Type::getInt64Ty(V->getContext()) && + "offset must be i64"); + if (!ValueFn) + ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); + + Value *Elts[] = { V }; + Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset, + D.getNode() }; + return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); +} //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index e12db81..4d5b312 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -121,8 +121,6 @@ namespace { return *LHS == *RHS; } - - static bool isPod() { return true; } }; class Andersens : public ModulePass, public AliasAnalysis, diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 37747b6..627dbbb 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -53,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) + if (newLoop->contains(L->getHeader())) return false; } return true; @@ -307,6 +307,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) AddUsersIfInteresting(I); + Processed.clear(); return false; } @@ -369,7 +370,7 @@ void IVUsers::dump() const { void IVUsers::releaseMemory() { IVUsesByStride.clear(); StrideOrder.clear(); - Processed.clear(); + IVUses.clear(); } void IVStrideUse::deleted() { diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 4de756c..34089ee 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -316,12 +316,12 @@ bool Loop::hasDedicatedExits() const { /// getUniqueExitBlocks - Return all unique successor blocks of this loop. /// These are the blocks _outside of the current loop_ which are branched to. -/// This assumes that loop is in canonical form. +/// This assumes that loop exits are in canonical form. /// void Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const { - assert(isLoopSimplifyForm() && - "getUniqueExitBlocks assumes the loop is in canonical form!"); + assert(hasDedicatedExits() && + "getUniqueExitBlocks assumes the loop has canonical form exits!"); // Sort the blocks vector so that we can use binary search to do quick // lookups. diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index ae6f970..a0c7706 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/PredIteratorCache.h" @@ -172,7 +173,7 @@ MemDepResult MemoryDependenceAnalysis:: getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB) { - Value *invariantTag = 0; + Value *InvariantTag = 0; // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -180,34 +181,36 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // If we're in an invariant region, no dependencies can be found before // we pass an invariant-begin marker. - if (invariantTag == Inst) { - invariantTag = 0; + if (InvariantTag == Inst) { + InvariantTag = 0; continue; - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + // Debug intrinsics don't cause dependences. + if (isa<DbgInfoIntrinsic>(Inst)) continue; + // If we pass an invariant-end marker, then we've just entered an // invariant region and can start ignoring dependencies. if (II->getIntrinsicID() == Intrinsic::invariant_end) { - uint64_t invariantSize = ~0ULL; - if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(2))) - invariantSize = CI->getZExtValue(); - - AliasAnalysis::AliasResult R = - AA->alias(II->getOperand(3), invariantSize, MemPtr, MemSize); + // FIXME: This only considers queries directly on the invariant-tagged + // pointer, not on query pointers that are indexed off of them. It'd + // be nice to handle that at some point. + AliasAnalysis::AliasResult R = + AA->alias(II->getOperand(3), ~0U, MemPtr, ~0U); if (R == AliasAnalysis::MustAlias) { - invariantTag = II->getOperand(1); + InvariantTag = II->getOperand(1); continue; } // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - } else if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - uint64_t invariantSize = ~0ULL; - if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(1))) - invariantSize = CI->getZExtValue(); - + } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + // FIXME: This only considers queries directly on the invariant-tagged + // pointer, not on query pointers that are indexed off of them. It'd + // be nice to handle that at some point. AliasAnalysis::AliasResult R = - AA->alias(II->getOperand(2), invariantSize, MemPtr, MemSize); + AA->alias(II->getOperand(2), ~0U, MemPtr, ~0U); if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(II); } @@ -215,10 +218,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // If we're querying on a load and we're in an invariant region, we're done // at this point. Nothing a load depends on can live in an invariant region. - if (isLoad && invariantTag) continue; - - // Debug intrinsics don't cause dependences. - if (isa<DbgInfoIntrinsic>(Inst)) continue; + if (isLoad && InvariantTag) continue; // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. @@ -243,7 +243,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // There can't be stores to the value we care about inside an // invariant region. - if (invariantTag) continue; + if (InvariantTag) continue; // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where @@ -292,7 +292,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, case AliasAnalysis::Mod: // If we're in an invariant region, we can ignore calls that ONLY // modify the pointer. - if (invariantTag) continue; + if (InvariantTag) continue; return MemDepResult::getClobber(Inst); case AliasAnalysis::Ref: // If the call is known to never store to the pointer, and if this is a @@ -374,21 +374,22 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { IntrinsicID = II->getIntrinsicID(); switch (IntrinsicID) { - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: - MemPtr = QueryInst->getOperand(2); - MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue(); - break; - case Intrinsic::invariant_end: - MemPtr = QueryInst->getOperand(3); - MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue(); - break; - default: - CallSite QueryCS = CallSite::get(QueryInst); - bool isReadOnly = AA->onlyReadsMemory(QueryCS); - LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, - QueryParent); + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + MemPtr = QueryInst->getOperand(2); + MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue(); + break; + case Intrinsic::invariant_end: + MemPtr = QueryInst->getOperand(3); + MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue(); + break; + default: + CallSite QueryCS = CallSite::get(QueryInst); + bool isReadOnly = AA->onlyReadsMemory(QueryCS); + LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, + QueryParent); + break; } } else { // Non-memory instruction. @@ -421,7 +422,7 @@ static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, if (Count == 0) return; for (unsigned i = 1; i != unsigned(Count); ++i) - assert(Cache[i-1] <= Cache[i] && "Cache isn't sorted!"); + assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!"); } #endif @@ -462,8 +463,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // determine what is dirty, seeding our initial DirtyBlocks worklist. for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); I != E; ++I) - if (I->second.isDirty()) - DirtyBlocks.push_back(I->first); + if (I->getResult().isDirty()) + DirtyBlocks.push_back(I->getBB()); // Sort the cache so that we can do fast binary search lookups below. std::sort(Cache.begin(), Cache.end()); @@ -501,27 +502,27 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { DEBUG(AssertSorted(Cache, NumSortedEntries)); NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, - std::make_pair(DirtyBB, MemDepResult())); - if (Entry != Cache.begin() && prior(Entry)->first == DirtyBB) + NonLocalDepEntry(DirtyBB)); + if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) --Entry; - MemDepResult *ExistingResult = 0; + NonLocalDepEntry *ExistingResult = 0; if (Entry != Cache.begin()+NumSortedEntries && - Entry->first == DirtyBB) { + Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block // is done. - if (!Entry->second.isDirty()) + if (!Entry->getResult().isDirty()) continue; // Otherwise, remember this slot so we can update the value. - ExistingResult = &Entry->second; + ExistingResult = &*Entry; } // If the dirty entry has a pointer, start scanning from it so we don't have // to rescan the entire block. BasicBlock::iterator ScanPos = DirtyBB->end(); if (ExistingResult) { - if (Instruction *Inst = ExistingResult->getInst()) { + if (Instruction *Inst = ExistingResult->getResult().getInst()) { ScanPos = Inst; // We're removing QueryInst's use of Inst. RemoveFromReverseMap(ReverseNonLocalDeps, Inst, @@ -545,9 +546,9 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) - *ExistingResult = Dep; + ExistingResult->setResult(Dep, 0); else - Cache.push_back(std::make_pair(DirtyBB, Dep)); + Cache.push_back(NonLocalDepEntry(DirtyBB, Dep, 0)); // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the association! @@ -587,17 +588,20 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType(); uint64_t PointeeSize = AA->getTypeStoreSize(EltTy); + PHITransAddr Address(Pointer, TD); + // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI // translation. DenseMap<BasicBlock*, Value*> Visited; - if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB, + if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB, Result, Visited, true)) return; Result.clear(); - Result.push_back(std::make_pair(FromBB, - MemDepResult::getClobber(FromBB->begin()))); + Result.push_back(NonLocalDepEntry(FromBB, + MemDepResult::getClobber(FromBB->begin()), + Pointer)); } /// GetNonLocalInfoForBlock - Compute the memdep value for BB with @@ -613,30 +617,30 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries, - std::make_pair(BB, MemDepResult())); - if (Entry != Cache->begin() && prior(Entry)->first == BB) + NonLocalDepEntry(BB)); + if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - MemDepResult *ExistingResult = 0; - if (Entry != Cache->begin()+NumSortedEntries && Entry->first == BB) - ExistingResult = &Entry->second; + NonLocalDepEntry *ExistingResult = 0; + if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) + ExistingResult = &*Entry; // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. - if (ExistingResult && !ExistingResult->isDirty()) { + if (ExistingResult && !ExistingResult->getResult().isDirty()) { ++NumCacheNonLocalPtr; - return *ExistingResult; + return ExistingResult->getResult(); } // Otherwise, we have to scan for the value. If we have a dirty cache // entry, start scanning from its position, otherwise we scan from the end // of the block. BasicBlock::iterator ScanPos = BB->end(); - if (ExistingResult && ExistingResult->getInst()) { - assert(ExistingResult->getInst()->getParent() == BB && + if (ExistingResult && ExistingResult->getResult().getInst()) { + assert(ExistingResult->getResult().getInst()->getParent() == BB && "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; - ScanPos = ExistingResult->getInst(); + ScanPos = ExistingResult->getResult().getInst(); // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Pointer, isLoad); @@ -652,9 +656,9 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) - *ExistingResult = Dep; + ExistingResult->setResult(Dep, Pointer); else - Cache->push_back(std::make_pair(BB, Dep)); + Cache->push_back(NonLocalDepEntry(BB, Dep, Pointer)); // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it @@ -683,7 +687,7 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, break; case 2: { // Two new entries, insert the last one into place. - MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back(); + NonLocalDepEntry Val = Cache.back(); Cache.pop_back(); MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.end()-1, Val); @@ -693,7 +697,7 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, case 1: // One new entry, Just insert the new value at the appropriate position. if (Cache.size() != 1) { - MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back(); + NonLocalDepEntry Val = Cache.back(); Cache.pop_back(); MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.end(), Val); @@ -707,275 +711,6 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, } } -/// isPHITranslatable - Return true if the specified computation is derived from -/// a PHI node in the current block and if it is simple enough for us to handle. -static bool isPHITranslatable(Instruction *Inst) { - if (isa<PHINode>(Inst)) - return true; - - // We can handle bitcast of a PHI, but the PHI needs to be in the same block - // as the bitcast. - if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - Instruction *OpI = dyn_cast<Instruction>(BC->getOperand(0)); - if (OpI == 0 || OpI->getParent() != Inst->getParent()) - return true; - return isPHITranslatable(OpI); - } - - // We can translate a GEP if all of its operands defined in this block are phi - // translatable. - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { - for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { - Instruction *OpI = dyn_cast<Instruction>(GEP->getOperand(i)); - if (OpI == 0 || OpI->getParent() != Inst->getParent()) - continue; - - if (!isPHITranslatable(OpI)) - return false; - } - return true; - } - - if (Inst->getOpcode() == Instruction::Add && - isa<ConstantInt>(Inst->getOperand(1))) { - Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0)); - if (OpI == 0 || OpI->getParent() != Inst->getParent()) - return true; - return isPHITranslatable(OpI); - } - - // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; - // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) - // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); - - return false; -} - -/// GetPHITranslatedValue - Given a computation that satisfied the -/// isPHITranslatable predicate, see if we can translate the computation into -/// the specified predecessor block. If so, return that value. -Value *MemoryDependenceAnalysis:: -GetPHITranslatedValue(Value *InVal, BasicBlock *CurBB, BasicBlock *Pred, - const TargetData *TD) const { - // If the input value is not an instruction, or if it is not defined in CurBB, - // then we don't need to phi translate it. - Instruction *Inst = dyn_cast<Instruction>(InVal); - if (Inst == 0 || Inst->getParent() != CurBB) - return InVal; - - if (PHINode *PN = dyn_cast<PHINode>(Inst)) - return PN->getIncomingValueForBlock(Pred); - - // Handle bitcast of PHI. - if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - // PHI translate the input operand. - Value *PHIIn = GetPHITranslatedValue(BC->getOperand(0), CurBB, Pred, TD); - if (PHIIn == 0) return 0; - - // Constants are trivial to phi translate. - if (Constant *C = dyn_cast<Constant>(PHIIn)) - return ConstantExpr::getBitCast(C, BC->getType()); - - // Otherwise we have to see if a bitcasted version of the incoming pointer - // is available. If so, we can use it, otherwise we have to fail. - for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); - UI != E; ++UI) { - if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) - if (BCI->getType() == BC->getType()) - return BCI; - } - return 0; - } - - // Handle getelementptr with at least one PHI translatable operand. - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { - SmallVector<Value*, 8> GEPOps; - BasicBlock *CurBB = GEP->getParent(); - for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { - Value *GEPOp = GEP->getOperand(i); - // No PHI translation is needed of operands whose values are live in to - // the predecessor block. - if (!isa<Instruction>(GEPOp) || - cast<Instruction>(GEPOp)->getParent() != CurBB) { - GEPOps.push_back(GEPOp); - continue; - } - - // If the operand is a phi node, do phi translation. - Value *InOp = GetPHITranslatedValue(GEPOp, CurBB, Pred, TD); - if (InOp == 0) return 0; - - GEPOps.push_back(InOp); - } - - // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) - return V; - - // Scan to see if we have this GEP available. - Value *APHIOp = GEPOps[0]; - for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); - UI != E; ++UI) { - if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) - if (GEPI->getType() == GEP->getType() && - GEPI->getNumOperands() == GEPOps.size() && - GEPI->getParent()->getParent() == CurBB->getParent()) { - bool Mismatch = false; - for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) - if (GEPI->getOperand(i) != GEPOps[i]) { - Mismatch = true; - break; - } - if (!Mismatch) - return GEPI; - } - } - return 0; - } - - // Handle add with a constant RHS. - if (Inst->getOpcode() == Instruction::Add && - isa<ConstantInt>(Inst->getOperand(1))) { - // PHI translate the LHS. - Value *LHS; - Constant *RHS = cast<ConstantInt>(Inst->getOperand(1)); - Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0)); - bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); - bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); - - if (OpI == 0 || OpI->getParent() != Inst->getParent()) - LHS = Inst->getOperand(0); - else { - LHS = GetPHITranslatedValue(Inst->getOperand(0), CurBB, Pred, TD); - if (LHS == 0) - return 0; - } - - // If the PHI translated LHS is an add of a constant, fold the immediates. - if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) - if (BOp->getOpcode() == Instruction::Add) - if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) { - LHS = BOp->getOperand(0); - RHS = ConstantExpr::getAdd(RHS, CI); - isNSW = isNUW = false; - } - - // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) - return Res; - - // Otherwise, see if we have this add available somewhere. - for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); - UI != E; ++UI) { - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) - if (BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && - BO->getParent()->getParent() == CurBB->getParent()) - return BO; - } - - return 0; - } - - return 0; -} - -/// GetAvailablePHITranslatePointer - Return the value computed by -/// PHITranslatePointer if it dominates PredBB, otherwise return null. -Value *MemoryDependenceAnalysis:: -GetAvailablePHITranslatedValue(Value *V, - BasicBlock *CurBB, BasicBlock *PredBB, - const TargetData *TD, - const DominatorTree &DT) const { - // See if PHI translation succeeds. - V = GetPHITranslatedValue(V, CurBB, PredBB, TD); - if (V == 0) return 0; - - // Make sure the value is live in the predecessor. - if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) - if (!DT.dominates(Inst->getParent(), PredBB)) - return 0; - return V; -} - - -/// InsertPHITranslatedPointer - Insert a computation of the PHI translated -/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB -/// block. All newly created instructions are added to the NewInsts list. -/// -Value *MemoryDependenceAnalysis:: -InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB, - BasicBlock *PredBB, const TargetData *TD, - const DominatorTree &DT, - SmallVectorImpl<Instruction*> &NewInsts) const { - // See if we have a version of this value already available and dominating - // PredBB. If so, there is no need to insert a new copy. - if (Value *Res = GetAvailablePHITranslatedValue(InVal, CurBB, PredBB, TD, DT)) - return Res; - - // If we don't have an available version of this value, it must be an - // instruction. - Instruction *Inst = cast<Instruction>(InVal); - - // Handle bitcast of PHI translatable value. - if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { - Value *OpVal = InsertPHITranslatedPointer(BC->getOperand(0), - CurBB, PredBB, TD, DT, NewInsts); - if (OpVal == 0) return 0; - - // Otherwise insert a bitcast at the end of PredBB. - BitCastInst *New = new BitCastInst(OpVal, InVal->getType(), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); - NewInsts.push_back(New); - return New; - } - - // Handle getelementptr with at least one PHI operand. - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { - SmallVector<Value*, 8> GEPOps; - BasicBlock *CurBB = GEP->getParent(); - for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { - Value *OpVal = InsertPHITranslatedPointer(GEP->getOperand(i), - CurBB, PredBB, TD, DT, NewInsts); - if (OpVal == 0) return 0; - GEPOps.push_back(OpVal); - } - - GetElementPtrInst *Result = - GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); - Result->setIsInBounds(GEP->isInBounds()); - NewInsts.push_back(Result); - return Result; - } - -#if 0 - // FIXME: This code works, but it is unclear that we actually want to insert - // a big chain of computation in order to make a value available in a block. - // This needs to be evaluated carefully to consider its cost trade offs. - - // Handle add with a constant RHS. - if (Inst->getOpcode() == Instruction::Add && - isa<ConstantInt>(Inst->getOperand(1))) { - // PHI translate the LHS. - Value *OpVal = InsertPHITranslatedPointer(Inst->getOperand(0), - CurBB, PredBB, TD, DT, NewInsts); - if (OpVal == 0) return 0; - - BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), - InVal->getName()+".phi.trans.insert", - PredBB->getTerminator()); - Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap()); - Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap()); - NewInsts.push_back(Res); - return Res; - } -#endif - - return 0; -} - /// getNonLocalPointerDepFromBB - Perform a dependency query based on /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def /// results to the results vector and keep track of which blocks are visited in @@ -989,14 +724,14 @@ InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB, /// not compute dependence information for some reason. This should be treated /// as a clobber dependence on the first instruction in the predecessor block. bool MemoryDependenceAnalysis:: -getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, +getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize, bool isLoad, BasicBlock *StartBB, SmallVectorImpl<NonLocalDepEntry> &Result, DenseMap<BasicBlock*, Value*> &Visited, bool SkipFirstBlock) { // Look up the cached info for Pointer. - ValueIsLoadPair CacheKey(Pointer, isLoad); + ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo = &NonLocalPointerDeps[CacheKey]; @@ -1013,8 +748,9 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, if (!Visited.empty()) { for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { - DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first); - if (VI == Visited.end() || VI->second == Pointer) continue; + DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); + if (VI == Visited.end() || VI->second == Pointer.getAddr()) + continue; // We have a pointer mismatch in a block. Just return clobber, saying // that something was clobbered in this result. We could also do a @@ -1025,8 +761,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { - Visited.insert(std::make_pair(I->first, Pointer)); - if (!I->second.isNonLocal()) + Visited.insert(std::make_pair(I->getBB(), Pointer.getAddr())); + if (!I->getResult().isNonLocal()) Result.push_back(*I); } ++NumCacheCompleteNonLocalPtr; @@ -1065,30 +801,27 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // Get the dependency info for Pointer in BB. If we have cached // information, we will use it, otherwise we compute it. DEBUG(AssertSorted(*Cache, NumSortedEntries)); - MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad, - BB, Cache, NumSortedEntries); + MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize, + isLoad, BB, Cache, + NumSortedEntries); // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal()) { - Result.push_back(NonLocalDepEntry(BB, Dep)); + Result.push_back(NonLocalDepEntry(BB, Dep, Pointer.getAddr())); continue; } } // If 'Pointer' is an instruction defined in this block, then we need to do // phi translation to change it into a value live in the predecessor block. - // If phi translation fails, then we can't continue dependence analysis. - Instruction *PtrInst = dyn_cast<Instruction>(Pointer); - bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB; - - // If no PHI translation is needed, just add all the predecessors of this - // block to scan them as well. - if (!NeedsPHITranslation) { + // If not, we just add the predecessors to the worklist and scan them with + // the same Pointer. + if (!Pointer.NeedsPHITranslationFromBlock(BB)) { SkipFirstBlock = false; for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { // Verify that we haven't looked at this block yet. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> - InsertRes = Visited.insert(std::make_pair(*PI, Pointer)); + InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr())); if (InsertRes.second) { // First time we've looked at *PI. Worklist.push_back(*PI); @@ -1098,16 +831,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. - if (InsertRes.first->second != Pointer) + if (InsertRes.first->second != Pointer.getAddr()) goto PredTranslationFailure; } continue; } - // If we do need to do phi translation, then there are a bunch of different - // cases, because we have to find a Value* live in the predecessor block. We - // know that PtrInst is defined in this block at least. - + // We do need to do phi translation, if we know ahead of time we can't phi + // translate this value, don't even try. + if (!Pointer.IsPotentiallyPHITranslatable()) + goto PredTranslationFailure; + // We may have added values to the cache list before this PHI translation. // If so, we haven't done anything to ensure that the cache remains sorted. // Sort it now (if needed) so that recursive invocations of @@ -1117,19 +851,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, SortNonLocalDepInfoCache(*Cache, NumSortedEntries); NumSortedEntries = Cache->size(); } - - // If this is a computation derived from a PHI node, use the suitably - // translated incoming values for each pred as the phi translated version. - if (!isPHITranslatable(PtrInst)) - goto PredTranslationFailure; - Cache = 0; - + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { BasicBlock *Pred = *PI; - // Get the PHI translated pointer in this predecessor. This can fail and - // return null if not translatable. - Value *PredPtr = GetPHITranslatedValue(PtrInst, BB, Pred, TD); + + // Get the PHI translated pointer in this predecessor. This can fail if + // not translatable, in which case the getAddr() returns null. + PHITransAddr PredPointer(Pointer); + PredPointer.PHITranslateValue(BB, Pred); + + Value *PredPtrVal = PredPointer.getAddr(); // Check to see if we have already visited this pred block with another // pointer. If so, we can't do this lookup. This failure can occur @@ -1137,12 +869,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // the successor translates to a pointer value different than the // pointer the block was first analyzed with. std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool> - InsertRes = Visited.insert(std::make_pair(Pred, PredPtr)); + InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal)); if (!InsertRes.second) { // If the predecessor was visited with PredPtr, then we already did // the analysis and can ignore it. - if (InsertRes.first->second == PredPtr) + if (InsertRes.first->second == PredPtrVal) continue; // Otherwise, the block was previously analyzed with a different @@ -1155,10 +887,11 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // predecessor, then we have to assume that the pointer is clobbered in // that predecessor. We can still do PRE of the load, which would insert // a computation of the pointer in this predecessor. - if (PredPtr == 0) { + if (PredPtrVal == 0) { // Add the entry to the Result list. NonLocalDepEntry Entry(Pred, - MemDepResult::getClobber(Pred->getTerminator())); + MemDepResult::getClobber(Pred->getTerminator()), + PredPtrVal); Result.push_back(Entry); // Add it to the cache for this CacheKey so that subsequent queries get @@ -1167,27 +900,27 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, MemoryDependenceAnalysis::NonLocalDepInfo::iterator It = std::upper_bound(Cache->begin(), Cache->end(), Entry); - if (It != Cache->begin() && prior(It)->first == Pred) + if (It != Cache->begin() && (It-1)->getBB() == Pred) --It; - if (It == Cache->end() || It->first != Pred) { + if (It == Cache->end() || It->getBB() != Pred) { Cache->insert(It, Entry); // Add it to the reverse map. ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); - } else if (!It->second.isDirty()) { + } else if (!It->getResult().isDirty()) { // noop - } else if (It->second.getInst() == Pred->getTerminator()) { + } else if (It->getResult().getInst() == Pred->getTerminator()) { // Same instruction, clear the dirty marker. - It->second = Entry.second; - } else if (It->second.getInst() == 0) { + It->setResult(Entry.getResult(), PredPtrVal); + } else if (It->getResult().getInst() == 0) { // Dirty, with no instruction, just add this. - It->second = Entry.second; + It->setResult(Entry.getResult(), PredPtrVal); ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); } else { // Otherwise, dirty with a different instruction. - RemoveFromReverseMap(ReverseNonLocalPtrDeps, It->second.getInst(), - CacheKey); - It->second = Entry.second; + RemoveFromReverseMap(ReverseNonLocalPtrDeps, + It->getResult().getInst(), CacheKey); + It->setResult(Entry.getResult(),PredPtrVal); ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); } Cache = 0; @@ -1201,7 +934,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // If we have a problem phi translating, fall through to the code below // to handle the failure condition. - if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred, + if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred, Result, Visited)) goto PredTranslationFailure; } @@ -1245,12 +978,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { assert(I != Cache->rend() && "Didn't find current block??"); - if (I->first != BB) + if (I->getBB() != BB) continue; - assert(I->second.isNonLocal() && + assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); - I->second = MemDepResult::getClobber(BB->begin()); + I->setResult(MemDepResult::getClobber(BB->begin()), Pointer.getAddr()); ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey); Result.push_back(*I); break; @@ -1276,9 +1009,9 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { NonLocalDepInfo &PInfo = It->second.second; for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { - Instruction *Target = PInfo[i].second.getInst(); + Instruction *Target = PInfo[i].getResult().getInst(); if (Target == 0) continue; // Ignore non-local dep results. - assert(Target->getParent() == PInfo[i].first); + assert(Target->getParent() == PInfo[i].getBB()); // Eliminating the dirty entry from 'Cache', so update the reverse info. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); @@ -1315,7 +1048,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { NonLocalDepInfo &BlockMap = NLDI->second.first; for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end(); DI != DE; ++DI) - if (Instruction *Inst = DI->second.getInst()) + if (Instruction *Inst = DI->getResult().getInst()) RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst); NonLocalDeps.erase(NLDI); } @@ -1403,10 +1136,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { for (NonLocalDepInfo::iterator DI = INLD.first.begin(), DE = INLD.first.end(); DI != DE; ++DI) { - if (DI->second.getInst() != RemInst) continue; + if (DI->getResult().getInst() != RemInst) continue; // Convert to a dirty entry for the subsequent instruction. - DI->second = NewDirtyVal; + DI->setResult(NewDirtyVal, DI->getAddress()); if (Instruction *NextI = NewDirtyVal.getInst()) ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); @@ -1445,10 +1178,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); DI != DE; ++DI) { - if (DI->second.getInst() != RemInst) continue; + if (DI->getResult().getInst() != RemInst) continue; // Convert to a dirty entry for the subsequent instruction. - DI->second = NewDirtyVal; + DI->setResult(NewDirtyVal, DI->getAddress()); if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); @@ -1489,7 +1222,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { const NonLocalDepInfo &Val = I->second.second; for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end(); II != E; ++II) - assert(II->second.getInst() != D && "Inst occurs as NLPD value"); + assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); } for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), @@ -1498,7 +1231,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { const PerInstNLInfo &INLD = I->second; for (NonLocalDepInfo::const_iterator II = INLD.first.begin(), EE = INLD.first.end(); II != EE; ++II) - assert(II->second.getInst() != D && "Inst occurs in data structures"); + assert(II->getResult().getInst() != D && "Inst occurs in data structures"); } for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp new file mode 100644 index 0000000..07e2919 --- /dev/null +++ b/lib/Analysis/PHITransAddr.cpp @@ -0,0 +1,432 @@ +//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PHITransAddr class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static bool CanPHITrans(Instruction *Inst) { + if (isa<PHINode>(Inst) || + isa<BitCastInst>(Inst) || + isa<GetElementPtrInst>(Inst)) + return true; + + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) + return true; + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + return false; +} + +void PHITransAddr::dump() const { + if (Addr == 0) { + errs() << "PHITransAddr: null\n"; + return; + } + errs() << "PHITransAddr: " << *Addr << "\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + errs() << " Input #" << i << " is " << *InstInputs[i] << "\n"; +} + + +static bool VerifySubExpr(Value *Expr, + SmallVectorImpl<Instruction*> &InstInputs) { + // If this is a non-instruction value, there is nothing to do. + Instruction *I = dyn_cast<Instruction>(Expr); + if (I == 0) return true; + + // If it's an instruction, it is either in Tmp or its operands recursively + // are. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return true; + } + + // If it isn't in the InstInputs list it is a subexpr incorporated into the + // address. Sanity check that it is phi translatable. + if (!CanPHITrans(I)) { + errs() << "Non phi translatable instruction found in PHITransAddr, either " + "something is missing from InstInputs or CanPHITrans is wrong:\n"; + errs() << *I << '\n'; + return false; + } + + // Validate the operands of the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!VerifySubExpr(I->getOperand(i), InstInputs)) + return false; + + return true; +} + +/// Verify - Check internal consistency of this data structure. If the +/// structure is valid, it returns true. If invalid, it prints errors and +/// returns false. +bool PHITransAddr::Verify() const { + if (Addr == 0) return true; + + SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end()); + + if (!VerifySubExpr(Addr, Tmp)) + return false; + + if (!Tmp.empty()) { + errs() << "PHITransAddr inconsistent, contains extra instructions:\n"; + for (unsigned i = 0, e = InstInputs.size(); i != e; ++i) + errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n"; + return false; + } + + // a-ok. + return true; +} + + +/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true +/// if we have some hope of doing it. This should be used as a filter to +/// avoid calling PHITranslateValue in hopeless situations. +bool PHITransAddr::IsPotentiallyPHITranslatable() const { + // If the input value is not an instruction, or if it is not defined in CurBB, + // then we don't need to phi translate it. + Instruction *Inst = dyn_cast<Instruction>(Addr); + return Inst == 0 || CanPHITrans(Inst); +} + + +static void RemoveInstInputs(Value *V, + SmallVectorImpl<Instruction*> &InstInputs) { + Instruction *I = dyn_cast<Instruction>(V); + if (I == 0) return; + + // If the instruction is in the InstInputs list, remove it. + SmallVectorImpl<Instruction*>::iterator Entry = + std::find(InstInputs.begin(), InstInputs.end(), I); + if (Entry != InstInputs.end()) { + InstInputs.erase(Entry); + return; + } + + assert(!isa<PHINode>(I) && "Error, removing something that isn't an input"); + + // Otherwise, it must have instruction inputs itself. Zap them recursively. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i))) + RemoveInstInputs(Op, InstInputs); + } +} + +Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, + BasicBlock *PredBB) { + // If this is a non-instruction value, it can't require PHI translation. + Instruction *Inst = dyn_cast<Instruction>(V); + if (Inst == 0) return V; + + // Determine whether 'Inst' is an input to our PHI translatable expression. + bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); + + // Handle inputs instructions if needed. + if (isInput) { + if (Inst->getParent() != CurBB) { + // If it is an input defined in a different block, then it remains an + // input. + return Inst; + } + + // If 'Inst' is defined in this block and is an input that needs to be phi + // translated, we need to incorporate the value into the expression or fail. + + // In either case, the instruction itself isn't an input any longer. + InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst)); + + // If this is a PHI, go ahead and translate it. + if (PHINode *PN = dyn_cast<PHINode>(Inst)) + return AddAsInput(PN->getIncomingValueForBlock(PredBB)); + + // If this is a non-phi value, and it is analyzable, we can incorporate it + // into the expression by making all instruction operands be inputs. + if (!CanPHITrans(Inst)) + return 0; + + // All instruction operands are now inputs (and of course, they may also be + // defined in this block, so they may need to be phi translated themselves. + for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i))) + InstInputs.push_back(Op); + } + + // Ok, it must be an intermediate result (either because it started that way + // or because we just incorporated it into the expression). See if its + // operands need to be phi translated, and if so, reconstruct it. + + if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { + Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB); + if (PHIIn == 0) return 0; + if (PHIIn == BC->getOperand(0)) + return BC; + + // Find an available version of this cast. + + // Constants are trivial to find. + if (Constant *C = dyn_cast<Constant>(PHIIn)) + return AddAsInput(ConstantExpr::getBitCast(C, BC->getType())); + + // Otherwise we have to see if a bitcasted version of the incoming pointer + // is available. If so, we can use it, otherwise we have to fail. + for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) + if (BCI->getType() == BC->getType()) + return BCI; + } + return 0; + } + + // Handle getelementptr with at least one PHI translatable operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + bool AnyChanged = false; + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB); + if (GEPOp == 0) return 0; + + AnyChanged |= GEPOp != GEP->getOperand(i); + GEPOps.push_back(GEPOp); + } + + if (!AnyChanged) + return GEP; + + // Simplify the GEP to handle 'gep x, 0' -> x etc. + if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) { + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + RemoveInstInputs(GEPOps[i], InstInputs); + + return AddAsInput(V); + } + + // Scan to see if we have this GEP available. + Value *APHIOp = GEPOps[0]; + for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); + UI != E; ++UI) { + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) + if (GEPI->getType() == GEP->getType() && + GEPI->getNumOperands() == GEPOps.size() && + GEPI->getParent()->getParent() == CurBB->getParent()) { + bool Mismatch = false; + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + if (GEPI->getOperand(i) != GEPOps[i]) { + Mismatch = true; + break; + } + if (!Mismatch) + return GEPI; + } + } + return 0; + } + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Constant *RHS = cast<ConstantInt>(Inst->getOperand(1)); + bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap(); + bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap(); + + Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB); + if (LHS == 0) return 0; + + // If the PHI translated LHS is an add of a constant, fold the immediates. + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS)) + if (BOp->getOpcode() == Instruction::Add) + if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + LHS = BOp->getOperand(0); + RHS = ConstantExpr::getAdd(RHS, CI); + isNSW = isNUW = false; + + // If the old 'LHS' was an input, add the new 'LHS' as an input. + if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) { + RemoveInstInputs(BOp, InstInputs); + AddAsInput(LHS); + } + } + + // See if the add simplifies away. + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) { + // If we simplified the operands, the LHS is no longer an input, but Res + // is. + RemoveInstInputs(LHS, InstInputs); + return AddAsInput(Res); + } + + // If we didn't modify the add, just return it. + if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1)) + return Inst; + + // Otherwise, see if we have this add available somewhere. + for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); + UI != E; ++UI) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI)) + if (BO->getOpcode() == Instruction::Add && + BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && + BO->getParent()->getParent() == CurBB->getParent()) + return BO; + } + + return 0; + } + + // Otherwise, we failed. + return 0; +} + + +/// PHITranslateValue - PHI translate the current address up the CFG from +/// CurBB to Pred, updating our state the reflect any needed changes. This +/// returns true on failure and sets Addr to null. +bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB) { + assert(Verify() && "Invalid PHITransAddr!"); + Addr = PHITranslateSubExpr(Addr, CurBB, PredBB); + assert(Verify() && "Invalid PHITransAddr!"); + return Addr == 0; +} + +/// GetAvailablePHITranslatedSubExpr - Return the value computed by +/// PHITranslateSubExpr if it dominates PredBB, otherwise return null. +Value *PHITransAddr:: +GetAvailablePHITranslatedSubExpr(Value *V, BasicBlock *CurBB,BasicBlock *PredBB, + const DominatorTree &DT) const { + PHITransAddr Tmp(V, TD); + Tmp.PHITranslateValue(CurBB, PredBB); + + // See if PHI translation succeeds. + V = Tmp.getAddr(); + + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) + if (!DT.dominates(Inst->getParent(), PredBB)) + return 0; + return V; +} + + +/// PHITranslateWithInsertion - PHI translate this value into the specified +/// predecessor block, inserting a computation of the value if it is +/// unavailable. +/// +/// All newly created instructions are added to the NewInsts list. This +/// returns null on failure. +/// +Value *PHITransAddr:: +PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, + const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + unsigned NISize = NewInsts.size(); + + // Attempt to PHI translate with insertion. + Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts); + + // If successful, return the new value. + if (Addr) return Addr; + + // If not, destroy any intermediate instructions inserted. + while (NewInsts.size() != NISize) + NewInsts.pop_back_val()->eraseFromParent(); + return 0; +} + + +/// InsertPHITranslatedPointer - Insert a computation of the PHI translated +/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB +/// block. All newly created instructions are added to the NewInsts list. +/// This returns null on failure. +/// +Value *PHITransAddr:: +InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, + BasicBlock *PredBB, const DominatorTree &DT, + SmallVectorImpl<Instruction*> &NewInsts) { + // See if we have a version of this value already available and dominating + // PredBB. If so, there is no need to insert a new instance of it. + if (Value *Res = GetAvailablePHITranslatedSubExpr(InVal, CurBB, PredBB, DT)) + return Res; + + // If we don't have an available version of this value, it must be an + // instruction. + Instruction *Inst = cast<Instruction>(InVal); + + // Handle bitcast of PHI translatable value. + if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) { + Value *OpVal = InsertPHITranslatedSubExpr(BC->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + // Otherwise insert a bitcast at the end of PredBB. + BitCastInst *New = new BitCastInst(OpVal, InVal->getType(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + NewInsts.push_back(New); + return New; + } + + // Handle getelementptr with at least one PHI operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) { + SmallVector<Value*, 8> GEPOps; + BasicBlock *CurBB = GEP->getParent(); + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + GEPOps.push_back(OpVal); + } + + GetElementPtrInst *Result = + GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Result->setIsInBounds(GEP->isInBounds()); + NewInsts.push_back(Result); + return Result; + } + +#if 0 + // FIXME: This code works, but it is unclear that we actually want to insert + // a big chain of computation in order to make a value available in a block. + // This needs to be evaluated carefully to consider its cost trade offs. + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa<ConstantInt>(Inst->getOperand(1))) { + // PHI translate the LHS. + Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0), + CurBB, PredBB, DT, NewInsts); + if (OpVal == 0) return 0; + + BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap()); + NewInsts.push_back(Res); + return Res; + } +#endif + + return 0; +} diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index e767891..8148429 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -35,6 +35,7 @@ namespace { LoopInfo *LI; std::set<BasicBlock*> BBToVisit; std::map<Loop*,double> LoopExitWeights; + std::map<Edge,double> MinimalWeight; public: static char ID; // Class identification, replacement for typeinfo explicit ProfileEstimatorPass(const double execcount = 0) @@ -91,7 +92,7 @@ static void inline printEdgeError(ProfileInfo::Edge e, const char *M) { void inline ProfileEstimatorPass::printEdgeWeight(Edge E) { DEBUG(errs() << "-- Weight of Edge " << E << ":" - << format("%g", getEdgeWeight(E)) << "\n"); + << format("%20.20g", getEdgeWeight(E)) << "\n"); } // recurseBasicBlock() - This calculates the ProfileInfo estimation for a @@ -174,6 +175,12 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { double w = getEdgeWeight(*ei); if (w == MissingValue) { Edges.push_back(*ei); + // Check if there is a necessary minimal weight, if yes, subtract it + // from weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + incoming -= MinimalWeight[*ei]; + DEBUG(errs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } } else { incoming -= w; } @@ -191,11 +198,43 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { printEdgeWeight(edge); } } - // Distribute remaining weight onto the exit edges. + + // Distribute remaining weight to the exting edges. To prevent fractions + // from building up and provoking precision problems the weight which is to + // be distributed is split and the rounded, the last edge gets a somewhat + // bigger value, but we are close enough for an estimation. + double fraction = floor(incoming/Edges.size()); for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); ei != ee; ++ei) { - EdgeInformation[BB->getParent()][*ei] += incoming/Edges.size(); + double w = 0; + if (ei != (ee-1)) { + w = fraction; + incoming -= fraction; + } else { + w = incoming; + } + EdgeInformation[BB->getParent()][*ei] += w; + // Read necessary minimal weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; + DEBUG(errs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } printEdgeWeight(*ei); + + // Add minimal weight to paths to all exit edges, this is used to ensure + // that enough flow is reaching this edges. + Path p; + const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest); + while (Dest != BB) { + const BasicBlock *Parent = p.find(Dest)->second; + Edge e = getEdge(Parent, Dest); + if (MinimalWeight.find(e) == MinimalWeight.end()) { + MinimalWeight[e] = 0; + } + MinimalWeight[e] += w; + DEBUG(errs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n"); + Dest = Parent; + } } // Increase flow into the loop. BBWeight *= (ExecCount+1); @@ -203,7 +242,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { BlockInformation[BB->getParent()][BB] = BBWeight; // Up until now we considered only the loop exiting edges, now we have a - // definite block weight and must ditribute this onto the outgoing edges. + // definite block weight and must distribute this onto the outgoing edges. // Since there may be already flow attached to some of the edges, read this // flow first and remember the edges that have still now flow attached. Edges.clear(); @@ -225,15 +264,32 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { BBWeight -= getEdgeWeight(edge); } else { Edges.push_back(edge); + // If minimal weight is necessary, reserve weight by subtracting weight + // from block weight, this is readded later on. + if (MinimalWeight.find(edge) != MinimalWeight.end()) { + BBWeight -= MinimalWeight[edge]; + DEBUG(errs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n"); + } } } } + double fraction = floor(BBWeight/Edges.size()); // Finally we know what flow is still not leaving the block, distribute this // flow onto the empty edges. for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end(); ei != ee; ++ei) { - EdgeInformation[BB->getParent()][*ei] += BBWeight/Edges.size(); + if (ei != (ee-1)) { + EdgeInformation[BB->getParent()][*ei] += fraction; + BBWeight -= fraction; + } else { + EdgeInformation[BB->getParent()][*ei] += BBWeight; + } + // Readd minial necessary weight. + if (MinimalWeight.find(*ei) != MinimalWeight.end()) { + EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; + DEBUG(errs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); + } printEdgeWeight(*ei); } @@ -260,20 +316,24 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) BBToVisit.insert(bi); + // Clear Minimal Edges. + MinimalWeight.clear(); + DEBUG(errs() << "Working on function " << F.getNameStr() << "\n"); // Since the entry block is the first one and has no predecessors, the edge // (0,entry) is inserted with the starting weight of 1. BasicBlock *entry = &F.getEntryBlock(); - BlockInformation[&F][entry] = 1; + BlockInformation[&F][entry] = pow(2.0, 32.0); Edge edge = getEdge(0,entry); - EdgeInformation[&F][edge] = 1; + EdgeInformation[&F][edge] = BlockInformation[&F][entry]; printEdgeWeight(edge); // Since recurseBasicBlock() maybe returns with a block which was not fully - // estimated, use recurseBasicBlock() until everything is calculated. + // estimated, use recurseBasicBlock() until everything is calculated. + bool cleanup = false; recurseBasicBlock(entry); - while (BBToVisit.size() > 0) { + while (BBToVisit.size() > 0 && !cleanup) { // Remember number of open blocks, this is later used to check if progress // was made. unsigned size = BBToVisit.size(); @@ -287,21 +347,65 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { if (BBToVisit.size() < size) break; } - // If there was not a single block resovled, make some assumptions. + // If there was not a single block resolved, make some assumptions. if (BBToVisit.size() == size) { - BasicBlock *BB = *(BBToVisit.begin()); - // Since this BB was not calculated because of missing incoming edges, - // set these edges to zero. - for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi) { - Edge e = getEdge(*bbi,BB); - double w = getEdgeWeight(e); - if (w == MissingValue) { - EdgeInformation[&F][e] = 0; - DEBUG(errs() << "Assuming edge weight: "); - printEdgeWeight(e); + bool found = false; + for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); + (BBI != BBE) && (!found); ++BBI) { + BasicBlock *BB = *BBI; + // Try each predecessor if it can be assumend. + for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + (bbi != bbe) && (!found); ++bbi) { + Edge e = getEdge(*bbi,BB); + double w = getEdgeWeight(e); + // Check that edge from predecessor is still free. + if (w == MissingValue) { + // Check if there is a circle from this block to predecessor. + Path P; + const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest); + if (Dest != *bbi) { + // If there is no circle, just set edge weight to 0 + EdgeInformation[&F][e] = 0; + DEBUG(errs() << "Assuming edge weight: "); + printEdgeWeight(e); + found = true; + } + } } } + if (!found) { + cleanup = true; + DEBUG(errs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n"); + } + } + } + // In case there was no safe way to assume edges, set as a last measure, + // set _everything_ to zero. + if (cleanup) { + FunctionInformation[&F] = 0; + BlockInformation[&F].clear(); + EdgeInformation[&F].clear(); + for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + const BasicBlock *BB = &(*FI); + BlockInformation[&F][BB] = 0; + pred_const_iterator predi = pred_begin(BB), prede = pred_end(BB); + if (predi == prede) { + Edge e = getEdge(0,BB); + setEdgeWeight(e,0); + } + for (;predi != prede; ++predi) { + Edge e = getEdge(*predi,BB); + setEdgeWeight(e,0); + } + succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB); + if (succi == succe) { + Edge e = getEdge(BB,0); + setEdgeWeight(e,0); + } + for (;succi != succe; ++succi) { + Edge e = getEdge(*succi,BB); + setEdgeWeight(e,0); + } } } diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 7f24f5a..c49c6e1 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -11,26 +11,52 @@ // "no profile" implementation. // //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "profile-info" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Format.h" +#include "llvm/ADT/SmallSet.h" #include <set> +#include <queue> +#include <limits> using namespace llvm; // Register the ProfileInfo interface, providing a nice name to refer to. static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information"); -char ProfileInfo::ID = 0; -ProfileInfo::~ProfileInfo() {} +namespace llvm { + +template <> +ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {} +template <> +ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {} + +template <> +ProfileInfoT<Function, BasicBlock>::ProfileInfoT() { + MachineProfile = 0; +} +template <> +ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() { + if (MachineProfile) delete MachineProfile; +} + +template<> +char ProfileInfoT<Function,BasicBlock>::ID = 0; + +template<> +char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0; + +template<> +const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1; -const double ProfileInfo::MissingValue = -1; +template<> const +double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1; -double ProfileInfo::getExecutionCount(const BasicBlock *BB) { +template<> double +ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) { std::map<const Function*, BlockCounts>::iterator J = BlockInformation.find(BB->getParent()); if (J != BlockInformation.end()) { @@ -39,36 +65,74 @@ double ProfileInfo::getExecutionCount(const BasicBlock *BB) { return I->second; } + double Count = MissingValue; + pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB); // Are there zero predecessors of this block? if (PI == PE) { - // If this is the entry block, look for the Null -> Entry edge. - if (BB == &BB->getParent()->getEntryBlock()) - return getEdgeWeight(getEdge(0, BB)); - else - return 0; // Otherwise, this is a dead block. + Edge e = getEdge(0,BB); + Count = getEdgeWeight(e); + } else { + // Otherwise, if there are predecessors, the execution count of this block is + // the sum of the edge frequencies from the incoming edges. + std::set<const BasicBlock*> ProcessedPreds; + Count = 0; + for (; PI != PE; ++PI) + if (ProcessedPreds.insert(*PI).second) { + double w = getEdgeWeight(getEdge(*PI, BB)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } } - // Otherwise, if there are predecessors, the execution count of this block is - // the sum of the edge frequencies from the incoming edges. - std::set<const BasicBlock*> ProcessedPreds; - double Count = 0; - for (; PI != PE; ++PI) - if (ProcessedPreds.insert(*PI).second) { - double w = getEdgeWeight(getEdge(*PI, BB)); - if (w == MissingValue) { - Count = MissingValue; - break; - } - Count += w; + // If the predecessors did not suffice to get block weight, try successors. + if (Count == MissingValue) { + + succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); + + // Are there zero successors of this block? + if (SI == SE) { + Edge e = getEdge(BB,0); + Count = getEdgeWeight(e); + } else { + std::set<const BasicBlock*> ProcessedSuccs; + Count = 0; + for (; SI != SE; ++SI) + if (ProcessedSuccs.insert(*SI).second) { + double w = getEdgeWeight(getEdge(BB, *SI)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } } + } if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count; return Count; } -double ProfileInfo::getExecutionCount(const Function *F) { +template<> +double ProfileInfoT<MachineFunction, MachineBasicBlock>:: + getExecutionCount(const MachineBasicBlock *MBB) { + std::map<const MachineFunction*, BlockCounts>::iterator J = + BlockInformation.find(MBB->getParent()); + if (J != BlockInformation.end()) { + BlockCounts::iterator I = J->second.find(MBB); + if (I != J->second.end()) + return I->second; + } + + return MissingValue; +} + +template<> +double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) { std::map<const Function*, double>::iterator J = FunctionInformation.find(F); if (J != FunctionInformation.end()) @@ -83,35 +147,211 @@ double ProfileInfo::getExecutionCount(const Function *F) { return Count; } +template<> +double ProfileInfoT<MachineFunction, MachineBasicBlock>:: + getExecutionCount(const MachineFunction *MF) { + std::map<const MachineFunction*, double>::iterator J = + FunctionInformation.find(MF); + if (J != FunctionInformation.end()) + return J->second; + + double Count = getExecutionCount(&MF->front()); + if (Count != MissingValue) FunctionInformation[MF] = Count; + return Count; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + setExecutionCount(const BasicBlock *BB, double w) { + DEBUG(errs() << "Creating Block " << BB->getName() + << " (weight: " << format("%.20g",w) << ")\n"); + BlockInformation[BB->getParent()][BB] = w; +} + +template<> +void ProfileInfoT<MachineFunction, MachineBasicBlock>:: + setExecutionCount(const MachineBasicBlock *MBB, double w) { + DEBUG(errs() << "Creating Block " << MBB->getBasicBlock()->getName() + << " (weight: " << format("%.20g",w) << ")\n"); + BlockInformation[MBB->getParent()][MBB] = w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) { + double oldw = getEdgeWeight(e); + assert (oldw != MissingValue && "Adding weight to Edge with no previous weight"); + DEBUG(errs() << "Adding to Edge " << e + << " (new weight: " << format("%.20g",oldw + w) << ")\n"); + EdgeInformation[getFunction(e)][e] = oldw + w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + addExecutionCount(const BasicBlock *BB, double w) { + double oldw = getExecutionCount(BB); + assert (oldw != MissingValue && "Adding weight to Block with no previous weight"); + DEBUG(errs() << "Adding to Block " << BB->getName() + << " (new weight: " << format("%.20g",oldw + w) << ")\n"); + BlockInformation[BB->getParent()][BB] = oldw + w; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) { + std::map<const Function*, BlockCounts>::iterator J = + BlockInformation.find(BB->getParent()); + if (J == BlockInformation.end()) return; + + DEBUG(errs() << "Deleting " << BB->getName() << "\n"); + J->second.erase(BB); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) { + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(getFunction(e)); + if (J == EdgeInformation.end()) return; + + DEBUG(errs() << "Deleting" << e << "\n"); + J->second.erase(e); +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + replaceEdge(const Edge &oldedge, const Edge &newedge) { + double w; + if ((w = getEdgeWeight(newedge)) == MissingValue) { + w = getEdgeWeight(oldedge); + DEBUG(errs() << "Replacing " << oldedge << " with " << newedge << "\n"); + } else { + w += getEdgeWeight(oldedge); + DEBUG(errs() << "Adding " << oldedge << " to " << newedge << "\n"); + } + setEdgeWeight(newedge,w); + removeEdge(oldedge); +} + +template<> +const BasicBlock *ProfileInfoT<Function,BasicBlock>:: + GetPath(const BasicBlock *Src, const BasicBlock *Dest, + Path &P, unsigned Mode) { + const BasicBlock *BB = 0; + bool hasFoundPath = false; + + std::queue<const BasicBlock *> BFS; + BFS.push(Src); + + while(BFS.size() && !hasFoundPath) { + BB = BFS.front(); + BFS.pop(); + + succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); + if (Succ == End) { + P[0] = BB; + if (Mode & GetPathToExit) { + hasFoundPath = true; + BB = 0; + } + } + for(;Succ != End; ++Succ) { + if (P.find(*Succ) != P.end()) continue; + Edge e = getEdge(BB,*Succ); + if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue; + P[*Succ] = BB; + BFS.push(*Succ); + if ((Mode & GetPathToDest) && *Succ == Dest) { + hasFoundPath = true; + BB = *Succ; + break; + } + if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) { + hasFoundPath = true; + BB = *Succ; + break; + } + } + } + + return BB; +} + +template<> +void ProfileInfoT<Function,BasicBlock>:: + divertFlow(const Edge &oldedge, const Edge &newedge) { + DEBUG(errs() << "Diverting " << oldedge << " via " << newedge ); + + // First check if the old edge was taken, if not, just delete it... + if (getEdgeWeight(oldedge) == 0) { + removeEdge(oldedge); + return; + } + + Path P; + P[newedge.first] = 0; + P[newedge.second] = newedge.first; + const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest); + + double w = getEdgeWeight (oldedge); + DEBUG(errs() << ", Weight: " << format("%.20g",w) << "\n"); + do { + const BasicBlock *Parent = P.find(BB)->second; + Edge e = getEdge(Parent,BB); + double oldw = getEdgeWeight(e); + double oldc = getExecutionCount(e.first); + setEdgeWeight(e, w+oldw); + if (Parent != oldedge.first) { + setExecutionCount(e.first, w+oldc); + } + BB = Parent; + } while (BB != newedge.first); + removeEdge(oldedge); +} + /// Replaces all occurences of RmBB in the ProfilingInfo with DestBB. /// This checks all edges of the function the blocks reside in and replaces the /// occurences of RmBB with DestBB. -void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, - const BasicBlock *DestBB) { - DEBUG(errs() << "Replacing " << RmBB->getNameStr() - << " with " << DestBB->getNameStr() << "\n"); +template<> +void ProfileInfoT<Function,BasicBlock>:: + replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { + DEBUG(errs() << "Replacing " << RmBB->getName() + << " with " << DestBB->getName() << "\n"); const Function *F = DestBB->getParent(); std::map<const Function*, EdgeWeights>::iterator J = EdgeInformation.find(F); if (J == EdgeInformation.end()) return; - for (EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); - I != E; ++I) { - Edge e = I->first; - Edge newedge; bool foundedge = false; + Edge e, newedge; + bool erasededge = false; + EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); + while(I != E) { + e = (I++)->first; + bool foundedge = false; bool eraseedge = false; if (e.first == RmBB) { - newedge = getEdge(DestBB, e.second); - foundedge = true; + if (e.second == DestBB) { + eraseedge = true; + } else { + newedge = getEdge(DestBB, e.second); + foundedge = true; + } } if (e.second == RmBB) { - newedge = getEdge(e.first, DestBB); - foundedge = true; + if (e.first == DestBB) { + eraseedge = true; + } else { + newedge = getEdge(e.first, DestBB); + foundedge = true; + } } if (foundedge) { - double w = getEdgeWeight(e); - EdgeInformation[F][newedge] = w; - DEBUG(errs() << "Replacing " << e << " with " << newedge << "\n"); - J->second.erase(e); + replaceEdge(e, newedge); + } + if (eraseedge) { + if (erasededge) { + Edge newedge = getEdge(DestBB, DestBB); + replaceEdge(e, newedge); + } else { + removeEdge(e); + erasededge = true; + } } } } @@ -119,10 +359,11 @@ void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, /// Splits an edge in the ProfileInfo and redirects flow over NewBB. /// Since its possible that there is more than one edge in the CFG from FristBB /// to SecondBB its necessary to redirect the flow proporionally. -void ProfileInfo::splitEdge(const BasicBlock *FirstBB, - const BasicBlock *SecondBB, - const BasicBlock *NewBB, - bool MergeIdenticalEdges) { +template<> +void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB, + const BasicBlock *SecondBB, + const BasicBlock *NewBB, + bool MergeIdenticalEdges) { const Function *F = FirstBB->getParent(); std::map<const Function*, EdgeWeights>::iterator J = EdgeInformation.find(F); @@ -153,7 +394,7 @@ void ProfileInfo::splitEdge(const BasicBlock *FirstBB, // We know now how many edges there are from FirstBB to SecondBB, reroute a // proportional part of the edge weight over NewBB. - double neww = w / succ_count; + double neww = floor(w / succ_count); ECs[n1] += neww; ECs[n2] += neww; BlockInformation[F][NewBB] += neww; @@ -164,14 +405,672 @@ void ProfileInfo::splitEdge(const BasicBlock *FirstBB, } } -raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) { +template<> +void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old, + const BasicBlock* New) { + const Function *F = Old->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + DEBUG(errs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n"); + + std::set<Edge> Edges; + for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); + ewi != ewe; ++ewi) { + Edge old = ewi->first; + if (old.first == Old) { + Edges.insert(old); + } + } + for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); + EI != EE; ++EI) { + Edge newedge = getEdge(New, EI->second); + replaceEdge(*EI, newedge); + } + + double w = getExecutionCount(Old); + setEdgeWeight(getEdge(Old, New), w); + setExecutionCount(New, w); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB, + const BasicBlock* NewBB, + BasicBlock *const *Preds, + unsigned NumPreds) { + const Function *F = BB->getParent(); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + DEBUG(errs() << "Splitting " << NumPreds << " Edges from " << BB->getName() + << " to " << NewBB->getName() << "\n"); + + // Collect weight that was redirected over NewBB. + double newweight = 0; + + std::set<const BasicBlock *> ProcessedPreds; + // For all requestes Predecessors. + for (unsigned pred = 0; pred < NumPreds; ++pred) { + const BasicBlock * Pred = Preds[pred]; + if (ProcessedPreds.insert(Pred).second) { + // Create edges and read old weight. + Edge oldedge = getEdge(Pred, BB); + Edge newedge = getEdge(Pred, NewBB); + + // Remember how much weight was redirected. + newweight += getEdgeWeight(oldedge); + + replaceEdge(oldedge,newedge); + } + } + + Edge newedge = getEdge(NewBB,BB); + setEdgeWeight(newedge, newweight); + setExecutionCount(NewBB, newweight); +} + +template<> +void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old, + const Function *New) { + DEBUG(errs() << "Replacing Function " << Old->getName() << " with " + << New->getName() << "\n"); + std::map<const Function*, EdgeWeights>::iterator J = + EdgeInformation.find(Old); + if(J != EdgeInformation.end()) { + EdgeInformation[New] = J->second; + } + EdgeInformation.erase(Old); + BlockInformation.erase(Old); + FunctionInformation.erase(Old); +} + +static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, + ProfileInfo::Edge &tocalc, unsigned &uncalc) { + if (w == ProfileInfo::MissingValue) { + tocalc = edge; + uncalc++; + return 0; + } else { + return w; + } +} + +template<> +bool ProfileInfoT<Function,BasicBlock>:: + CalculateMissingEdge(const BasicBlock *BB, Edge &removed, + bool assumeEmptySelf) { + Edge edgetocalc; + unsigned uncalculated = 0; + + // collect weights of all incoming and outgoing edges, rememer edges that + // have no value + double incount = 0; + SmallSet<const BasicBlock*,8> pred_visited; + pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi==bbe) { + Edge e = getEdge(0,BB); + incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); + } + for (;bbi != bbe; ++bbi) { + if (pred_visited.insert(*bbi)) { + Edge e = getEdge(*bbi,BB); + incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); + } + } + + double outcount = 0; + SmallSet<const BasicBlock*,8> succ_visited; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi==sbbe) { + Edge e = getEdge(BB,0); + if (getEdgeWeight(e) == MissingValue) { + double w = getExecutionCount(BB); + if (w != MissingValue) { + setEdgeWeight(e,w); + removed = e; + } + } + outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); + } + for (;sbbi != sbbe; ++sbbi) { + if (succ_visited.insert(*sbbi)) { + Edge e = getEdge(BB,*sbbi); + outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); + } + } + + // if exactly one edge weight was missing, calculate it and remove it from + // spanning tree + if (uncalculated == 0 ) { + return true; + } else + if (uncalculated == 1) { + if (incount < outcount) { + EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; + } else { + EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; + } + DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": " + << format("%.20g", getEdgeWeight(edgetocalc)) << "\n"); + removed = edgetocalc; + return true; + } else + if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) { + setEdgeWeight(edgetocalc, incount * 10); + removed = edgetocalc; + return true; + } else { + return false; + } +} + +static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) { + double w = PI->getEdgeWeight(e); + if (w != ProfileInfo::MissingValue) { + calcw += w; + } else { + misscount.insert(e); + } +} + +template<> +bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) { + bool hasNoSuccessors = false; + + double inWeight = 0; + std::set<Edge> inMissing; + std::set<const BasicBlock*> ProcessedPreds; + pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi == bbe) { + readEdge(this,getEdge(0,BB),inWeight,inMissing); + } + for( ; bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + readEdge(this,getEdge(*bbi,BB),inWeight,inMissing); + } + } + + double outWeight = 0; + std::set<Edge> outMissing; + std::set<const BasicBlock*> ProcessedSuccs; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi == sbbe) { + readEdge(this,getEdge(BB,0),outWeight,outMissing); + hasNoSuccessors = true; + } + for ( ; sbbi != sbbe; ++sbbi ) { + if (ProcessedSuccs.insert(*sbbi).second) { + readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing); + } + } + + double share; + std::set<Edge>::iterator ei,ee; + if (inMissing.size() == 0 && outMissing.size() > 0) { + ei = outMissing.begin(); + ee = outMissing.end(); + share = inWeight/outMissing.size(); + setExecutionCount(BB,inWeight); + } else + if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) { + ei = inMissing.begin(); + ee = inMissing.end(); + share = 0; + setExecutionCount(BB,0); + } else + if (inMissing.size() == 0 && outMissing.size() == 0) { + setExecutionCount(BB,outWeight); + return true; + } else { + return false; + } + for ( ; ei != ee; ++ei ) { + setEdgeWeight(*ei,share); + } + return true; +} + +template<> +void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { +// if (getExecutionCount(&(F->getEntryBlock())) == 0) { +// for (Function::const_iterator FI = F->begin(), FE = F->end(); +// FI != FE; ++FI) { +// const BasicBlock* BB = &(*FI); +// { +// pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); +// if (NBB == End) { +// setEdgeWeight(getEdge(0,BB),0); +// } +// for(;NBB != End; ++NBB) { +// setEdgeWeight(getEdge(*NBB,BB),0); +// } +// } +// { +// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); +// if (NBB == End) { +// setEdgeWeight(getEdge(0,BB),0); +// } +// for(;NBB != End; ++NBB) { +// setEdgeWeight(getEdge(*NBB,BB),0); +// } +// } +// } +// return; +// } + // The set of BasicBlocks that are still unvisited. + std::set<const BasicBlock*> Unvisited; + + // The set of return edges (Edges with no successors). + std::set<Edge> ReturnEdges; + double ReturnWeight = 0; + + // First iterate over the whole function and collect: + // 1) The blocks in this function in the Unvisited set. + // 2) The return edges in the ReturnEdges set. + // 3) The flow that is leaving the function already via return edges. + + // Data structure for searching the function. + std::queue<const BasicBlock *> BFS; + const BasicBlock *BB = &(F->getEntryBlock()); + BFS.push(BB); + Unvisited.insert(BB); + + while (BFS.size()) { + BB = BFS.front(); BFS.pop(); + succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + if (NBB == End) { + Edge e = getEdge(BB,0); + double w = getEdgeWeight(e); + if (w == MissingValue) { + // If the return edge has no value, try to read value from block. + double bw = getExecutionCount(BB); + if (bw != MissingValue) { + setEdgeWeight(e,bw); + ReturnWeight += bw; + } else { + // If both return edge and block provide no value, collect edge. + ReturnEdges.insert(e); + } + } else { + // If the return edge has a proper value, collect it. + ReturnWeight += w; + } + } + for (;NBB != End; ++NBB) { + if (Unvisited.insert(*NBB).second) { + BFS.push(*NBB); + } + } + } + + while (Unvisited.size() > 0) { + unsigned oldUnvisitedCount = Unvisited.size(); + bool FoundPath = false; + + // If there is only one edge left, calculate it. + if (ReturnEdges.size() == 1) { + ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight; + + Edge e = *ReturnEdges.begin(); + setEdgeWeight(e,ReturnWeight); + setExecutionCount(e.first,ReturnWeight); + + Unvisited.erase(e.first); + ReturnEdges.erase(e); + continue; + } + + // Calculate all blocks where only one edge is missing, this may also + // resolve furhter return edges. + std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + Edge e; + if(CalculateMissingEdge(BB,e,true)) { + if (BlockInformation[F].find(BB) == BlockInformation[F].end()) { + setExecutionCount(BB,getExecutionCount(BB)); + } + Unvisited.erase(BB); + if (e.first != 0 && e.second == 0) { + ReturnEdges.erase(e); + ReturnWeight += getEdgeWeight(e); + } + } + } + if (oldUnvisitedCount > Unvisited.size()) continue; + + // Estimate edge weights by dividing the flow proportionally. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + const BasicBlock *Dest = 0; + bool AllEdgesHaveSameReturn = true; + // Check each Successor, these must all end up in the same or an empty + // return block otherwise its dangerous to do an estimation on them. + for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); + Succ != End; ++Succ) { + Path P; + GetPath(*Succ, 0, P, GetPathToExit); + if (Dest && Dest != P[0]) { + AllEdgesHaveSameReturn = false; + } + Dest = P[0]; + } + if (AllEdgesHaveSameReturn) { + if(EstimateMissingEdges(BB)) { + Unvisited.erase(BB); + break; + } + } + } + if (oldUnvisitedCount > Unvisited.size()) continue; + + // Check if there is a path to an block that has a known value and redirect + // flow accordingly. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + // Fetch path. + const BasicBlock *BB = *FI; ++FI; + Path P; + const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue); + + // Calculate incoming flow. + double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0; + std::set<const BasicBlock *> Processed; + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + invalid++; + } else { + // If the path contains the successor, this means its a backedge, + // do not count as missing. + if (P.find(*NBB) == P.end()) + inmissing++; + } + incount++; + } + } + if (inmissing == incount) continue; + if (invalid == 0) continue; + + // Subtract (already) outgoing flow. + Processed.clear(); + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(BB, *NBB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw -= ew; + } + } + } + if (iw < 0) continue; + + // Check the recieving end of the path if it can handle the flow. + double ow = getExecutionCount(Dest); + Processed.clear(); + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(BB, *NBB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + ow -= ew; + } + } + } + if (ow < 0) continue; + + // Determine how much flow shall be used. + double ew = getEdgeWeight(getEdge(P[Dest],Dest)); + if (ew != MissingValue) { + ew = ew<ow?ew:ow; + ew = ew<iw?ew:iw; + } else { + if (inmissing == 0) + ew = iw; + } + + // Create flow. + if (ew != MissingValue) { + do { + Edge e = getEdge(P[Dest],Dest); + if (getEdgeWeight(e) == MissingValue) { + setEdgeWeight(e,ew); + FoundPath = true; + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Calculate a block with self loop. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + bool SelfEdgeFound = false; + for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); + NBB != End; ++NBB) { + if (*NBB == BB) { + SelfEdgeFound = true; + break; + } + } + if (SelfEdgeFound) { + Edge e = getEdge(BB,BB); + if (getEdgeWeight(e) == MissingValue) { + double iw = 0; + std::set<const BasicBlock *> Processed; + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + } + } + } + setEdgeWeight(e,iw * 10); + FoundPath = true; + } + } + } + if (FoundPath) continue; + + // Determine backedges, set them to zero. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + const BasicBlock *Dest; + Path P; + bool BackEdgeFound = false; + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges); + if (Dest == *NBB) { + BackEdgeFound = true; + break; + } + } + if (BackEdgeFound) { + Edge e = getEdge(Dest,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + } + do { + Edge e = getEdge(P[Dest], Dest); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Channel flow to return block. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + + Path P; + const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); + Dest = P[0]; + if (!Dest) continue; + + if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { + // Calculate incoming flow. + double iw = 0; + std::set<const BasicBlock *> Processed; + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + if (Processed.insert(*NBB).second) { + Edge e = getEdge(*NBB, BB); + double ew = getEdgeWeight(e); + if (ew != MissingValue) { + iw += ew; + } + } + } + do { + Edge e = getEdge(P[Dest], Dest); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,iw); + FoundPath = true; + } else { + assert(0 && "Edge should not have value already!"); + } + Dest = P[Dest]; + } while (Dest != BB); + } + } + if (FoundPath) continue; + + // Speculatively set edges to zero. + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE && !FoundPath) { + const BasicBlock *BB = *FI; ++FI; + + for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB); + NBB != End; ++NBB) { + Edge e = getEdge(*NBB,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + setEdgeWeight(e,0); + FoundPath = true; + break; + } + } + } + if (FoundPath) continue; + + errs() << "{"; + FI = Unvisited.begin(), FE = Unvisited.end(); + while(FI != FE) { + const BasicBlock *BB = *FI; ++FI; + errs() << BB->getName(); + if (FI != FE) + errs() << ","; + } + errs() << "}"; + + errs() << "ASSERT: could not repair function"; + assert(0 && "could not repair function"); + } + + EdgeWeights J = EdgeInformation[F]; + for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) { + Edge e = EI->first; + + bool SuccFound = false; + if (e.first != 0) { + succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first); + if (NBB == End) { + if (0 == e.second) { + SuccFound = true; + } + } + for (;NBB != End; ++NBB) { + if (*NBB == e.second) { + SuccFound = true; + break; + } + } + if (!SuccFound) { + removeEdge(e); + } + } + } +} + +raw_ostream& operator<<(raw_ostream &O, const Function *F) { + return O << F->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { + return O << MF->getFunction()->getName() << "(MF)"; +} + +raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) { + return O << BB->getName(); +} + +raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { + return O << MBB->getBasicBlock()->getName() << "(MB)"; +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) { O << "("; - O << (E.first ? E.first->getNameStr() : "0"); + + if (E.first) + O << E.first; + else + O << "0"; + + O << ","; + + if (E.second) + O << E.second; + else + O << "0"; + + return O << ")"; +} + +raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) { + O << "("; + + if (E.first) + O << E.first; + else + O << "0"; + O << ","; - O << (E.second ? E.second->getNameStr() : "0"); + + if (E.second) + O << E.second; + else + O << "0"; + return O << ")"; } +} // namespace llvm + //===----------------------------------------------------------------------===// // NoProfile ProfileInfo implementation // diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 9e1dfb6..cbd0430 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -74,6 +74,8 @@ X("profile-loader", "Load profile information from llvmprof.out", false, true); static RegisterAnalysisGroup<ProfileInfo> Y(X); +const PassInfo *llvm::ProfileLoaderPassID = &X; + ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); } /// createProfileLoaderPass - This function returns a Pass that loads the @@ -112,46 +114,9 @@ void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { recurseBasicBlock(*bbi); } - Edge edgetocalc; - unsigned uncalculated = 0; - - // collect weights of all incoming and outgoing edges, rememer edges that - // have no value - double incount = 0; - SmallSet<const BasicBlock*,8> pred_visited; - pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - if (bbi==bbe) { - readEdgeOrRemember(getEdge(0, BB),edgetocalc,uncalculated,incount); - } - for (;bbi != bbe; ++bbi) { - if (pred_visited.insert(*bbi)) { - readEdgeOrRemember(getEdge(*bbi, BB),edgetocalc,uncalculated,incount); - } - } - - double outcount = 0; - SmallSet<const BasicBlock*,8> succ_visited; - succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); - if (sbbi==sbbe) { - readEdgeOrRemember(getEdge(BB, 0),edgetocalc,uncalculated,outcount); - } - for (;sbbi != sbbe; ++sbbi) { - if (succ_visited.insert(*sbbi)) { - readEdgeOrRemember(getEdge(BB, *sbbi),edgetocalc,uncalculated,outcount); - } - } - - // if exactly one edge weight was missing, calculate it and remove it from - // spanning tree - if (uncalculated == 1) { - if (incount < outcount) { - EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; - } else { - EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; - } - DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": " - << format("%g", getEdgeWeight(edgetocalc)) << "\n"); - SpanningTree.erase(edgetocalc); + Edge tocalc; + if (CalculateMissingEdge(BB, tocalc)) { + SpanningTree.erase(tocalc); } } @@ -219,9 +184,9 @@ bool LoaderPass::runOnModule(Module &M) { } } while (SpanningTree.size() > 0) { -#if 0 + unsigned size = SpanningTree.size(); -#endif + BBisUnvisited.clear(); for (std::set<Edge>::iterator ei = SpanningTree.begin(), ee = SpanningTree.end(); ei != ee; ++ei) { @@ -231,17 +196,16 @@ bool LoaderPass::runOnModule(Module &M) { while (BBisUnvisited.size() > 0) { recurseBasicBlock(*BBisUnvisited.begin()); } -#if 0 + if (SpanningTree.size() == size) { DEBUG(errs()<<"{"); for (std::set<Edge>::iterator ei = SpanningTree.begin(), ee = SpanningTree.end(); ei != ee; ++ei) { - DEBUG(errs()<<"("<<(ei->first?ei->first->getName():"0")<<"," - <<(ei->second?ei->second->getName():"0")<<"),"); + DEBUG(errs()<< *ei <<","); } assert(0 && "No edge calculated!"); } -#endif + } } if (ReadCount != Counters.size()) { diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index 5f36294..36a80ba 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" #include "llvm/Support/Debug.h" #include <set> using namespace llvm; @@ -29,44 +30,45 @@ static cl::opt<bool,false> ProfileVerifierDisableAssertions("profile-verifier-noassert", cl::desc("Disable assertions")); -namespace { - class ProfileVerifierPass : public FunctionPass { +namespace llvm { + template<class FType, class BType> + class ProfileVerifierPassT : public FunctionPass { struct DetailedBlockInfo { - const BasicBlock *BB; - double BBWeight; - double inWeight; - int inCount; - double outWeight; - int outCount; + const BType *BB; + double BBWeight; + double inWeight; + int inCount; + double outWeight; + int outCount; }; - ProfileInfo *PI; - std::set<const BasicBlock*> BBisVisited; - std::set<const Function*> FisVisited; + ProfileInfoT<FType, BType> *PI; + std::set<const BType*> BBisVisited; + std::set<const FType*> FisVisited; bool DisableAssertions; // When debugging is enabled, the verifier prints a whole slew of debug // information, otherwise its just the assert. These are all the helper // functions. bool PrintedDebugTree; - std::set<const BasicBlock*> BBisPrinted; + std::set<const BType*> BBisPrinted; void debugEntry(DetailedBlockInfo*); - void printDebugInfo(const BasicBlock *BB); + void printDebugInfo(const BType *BB); public: static char ID; // Class identification, replacement for typeinfo - explicit ProfileVerifierPass () : FunctionPass(&ID) { + explicit ProfileVerifierPassT () : FunctionPass(&ID) { DisableAssertions = ProfileVerifierDisableAssertions; } - explicit ProfileVerifierPass (bool da) : FunctionPass(&ID), - DisableAssertions(da) { + explicit ProfileVerifierPassT (bool da) : FunctionPass(&ID), + DisableAssertions(da) { } void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); - AU.addRequired<ProfileInfo>(); + AU.addRequired<ProfileInfoT<FType, BType> >(); } const char *getPassName() const { @@ -74,271 +76,302 @@ namespace { } /// run - Verify the profile information. - bool runOnFunction(Function &F); - void recurseBasicBlock(const BasicBlock*); + bool runOnFunction(FType &F); + void recurseBasicBlock(const BType*); - bool exitReachable(const Function*); - double ReadOrAssert(ProfileInfo::Edge); + bool exitReachable(const FType*); + double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge); void CheckValue(bool, const char*, DetailedBlockInfo*); }; -} // End of anonymous namespace - -char ProfileVerifierPass::ID = 0; -static RegisterPass<ProfileVerifierPass> -X("profile-verifier", "Verify profiling information", false, true); -namespace llvm { - FunctionPass *createProfileVerifierPass() { - return new ProfileVerifierPass(ProfileVerifierDisableAssertions); - } -} - -void ProfileVerifierPass::printDebugInfo(const BasicBlock *BB) { - - if (BBisPrinted.find(BB) != BBisPrinted.end()) return; - - double BBWeight = PI->getExecutionCount(BB); - if (BBWeight == ProfileInfo::MissingValue) { BBWeight = 0; } - double inWeight = 0; - int inCount = 0; - std::set<const BasicBlock*> ProcessedPreds; - for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi ) { - if (ProcessedPreds.insert(*bbi).second) { - ProfileInfo::Edge E = PI->getEdge(*bbi,BB); - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; } - errs() << "calculated in-edge " << E << ": " << EdgeWeight << "\n"; - inWeight += EdgeWeight; - inCount++; + typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass; + + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) { + + if (BBisPrinted.find(BB) != BBisPrinted.end()) return; + + double BBWeight = PI->getExecutionCount(BB); + if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; } + double inWeight = 0; + int inCount = 0; + std::set<const BType*> ProcessedPreds; + for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } + errs() << "calculated in-edge " << E << ": " + << format("%20.20g",EdgeWeight) << "\n"; + inWeight += EdgeWeight; + inCount++; + } } - } - double outWeight = 0; - int outCount = 0; - std::set<const BasicBlock*> ProcessedSuccs; - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - if (ProcessedSuccs.insert(*bbi).second) { - ProfileInfo::Edge E = PI->getEdge(BB,*bbi); - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; } - errs() << "calculated out-edge " << E << ": " << EdgeWeight << "\n"; - outWeight += EdgeWeight; - outCount++; + double outWeight = 0; + int outCount = 0; + std::set<const BType*> ProcessedSuccs; + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; } + errs() << "calculated out-edge " << E << ": " + << format("%20.20g",EdgeWeight) << "\n"; + outWeight += EdgeWeight; + outCount++; + } + } + errs() << "Block " << BB->getNameStr() << " in " + << BB->getParent()->getNameStr() << ":" + << "BBWeight=" << format("%20.20g",BBWeight) << "," + << "inWeight=" << format("%20.20g",inWeight) << "," + << "inCount=" << inCount << "," + << "outWeight=" << format("%20.20g",outWeight) << "," + << "outCount" << outCount << "\n"; + + // mark as visited and recurse into subnodes + BBisPrinted.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + printDebugInfo(*bbi); } } - errs()<<"Block "<<BB->getNameStr()<<" in "<<BB->getParent()->getNameStr() - <<",BBWeight="<<BBWeight<<",inWeight="<<inWeight<<",inCount="<<inCount - <<",outWeight="<<outWeight<<",outCount"<<outCount<<"\n"; - - // mark as visited and recurse into subnodes - BBisPrinted.insert(BB); - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - printDebugInfo(*bbi); - } -} -void ProfileVerifierPass::debugEntry (DetailedBlockInfo *DI) { - errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " - << DI->BB->getParent()->getNameStr() << ":"; - errs() << "BBWeight=" << DI->BBWeight << ","; - errs() << "inWeight=" << DI->inWeight << ","; - errs() << "inCount=" << DI->inCount << ","; - errs() << "outWeight=" << DI->outWeight << ","; - errs() << "outCount=" << DI->outCount << "\n"; - if (!PrintedDebugTree) { - PrintedDebugTree = true; - printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) { + errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " + << DI->BB->getParent()->getNameStr() << ":" + << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," + << "inWeight=" << format("%20.20g",DI->inWeight) << "," + << "inCount=" << DI->inCount << "," + << "outWeight=" << format("%20.20g",DI->outWeight) << "," + << "outCount=" << DI->outCount << "\n"; + if (!PrintedDebugTree) { + PrintedDebugTree = true; + printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); + } } -} -// This compares A and B but considering maybe small differences. -static bool Equals(double A, double B) { - double maxRelativeError = 0.0000001; - if (A == B) - return true; - double relativeError; - if (fabs(B) > fabs(A)) - relativeError = fabs((A - B) / B); - else - relativeError = fabs((A - B) / A); - if (relativeError <= maxRelativeError) return true; - return false; -} + // This compares A and B for equality. + static bool Equals(double A, double B) { + return A == B; + } -// This checks if the function "exit" is reachable from an given function -// via calls, this is necessary to check if a profile is valid despite the -// counts not fitting exactly. -bool ProfileVerifierPass::exitReachable(const Function *F) { - if (!F) return false; + // This checks if the function "exit" is reachable from an given function + // via calls, this is necessary to check if a profile is valid despite the + // counts not fitting exactly. + template<class FType, class BType> + bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) { + if (!F) return false; - if (FisVisited.count(F)) return false; + if (FisVisited.count(F)) return false; - Function *Exit = F->getParent()->getFunction("exit"); - if (Exit == F) { - return true; - } + FType *Exit = F->getParent()->getFunction("exit"); + if (Exit == F) { + return true; + } - FisVisited.insert(F); - bool exits = false; - for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - if (const CallInst *CI = dyn_cast<CallInst>(&*I)) { - exits |= exitReachable(CI->getCalledFunction()); - if (exits) break; + FisVisited.insert(F); + bool exits = false; + for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (const CallInst *CI = dyn_cast<CallInst>(&*I)) { + FType *F = CI->getCalledFunction(); + if (F) { + exits |= exitReachable(F); + } else { + // This is a call to a pointer, all bets are off... + exits = true; + } + if (exits) break; + } } + return exits; } - return exits; -} -#define ASSERTMESSAGE(M) \ - errs() << (M) << "\n"; \ - if (!DisableAssertions) assert(0 && (M)); - -double ProfileVerifierPass::ReadOrAssert(ProfileInfo::Edge E) { - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfo::MissingValue) { - errs() << "Edge " << E << " in Function " - << ProfileInfo::getFunction(E)->getNameStr() << ": "; - ASSERTMESSAGE("ASSERT:Edge has missing value"); - return 0; - } else { - return EdgeWeight; + #define ASSERTMESSAGE(M) \ + { errs() << "ASSERT:" << (M) << "\n"; \ + if (!DisableAssertions) assert(0 && (M)); } + + template<class FType, class BType> + double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) { + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { + errs() << "Edge " << E << " in Function " + << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("Edge has missing value"); + return 0; + } else { + if (EdgeWeight < 0) { + errs() << "Edge " << E << " in Function " + << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("Edge has negative value"); + } + return EdgeWeight; + } } -} -void ProfileVerifierPass::CheckValue(bool Error, const char *Message, - DetailedBlockInfo *DI) { - if (Error) { - DEBUG(debugEntry(DI)); - errs() << "Block " << DI->BB->getNameStr() << " in Function " - << DI->BB->getParent()->getNameStr() << ": "; - ASSERTMESSAGE(Message); + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, + const char *Message, + DetailedBlockInfo *DI) { + if (Error) { + DEBUG(debugEntry(DI)); + errs() << "Block " << DI->BB->getNameStr() << " in Function " + << DI->BB->getParent()->getNameStr() << ": "; + ASSERTMESSAGE(Message); + } + return; } - return; -} -// This calculates the Information for a block and then recurses into the -// successors. -void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) { - - // Break the recursion by remembering all visited blocks. - if (BBisVisited.find(BB) != BBisVisited.end()) return; - - // Use a data structure to store all the information, this can then be handed - // to debug printers. - DetailedBlockInfo DI; - DI.BB = BB; - DI.outCount = DI.inCount = 0; - DI.inWeight = DI.outWeight = 0.0; - - // Read predecessors. - std::set<const BasicBlock*> ProcessedPreds; - pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB); - // If there are none, check for (0,BB) edge. - if (bpi == bpe) { - DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); - DI.inCount++; - } - for (;bpi != bpe; ++bpi) { - if (ProcessedPreds.insert(*bpi).second) { - DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); + // This calculates the Information for a block and then recurses into the + // successors. + template<class FType, class BType> + void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) { + + // Break the recursion by remembering all visited blocks. + if (BBisVisited.find(BB) != BBisVisited.end()) return; + + // Use a data structure to store all the information, this can then be handed + // to debug printers. + DetailedBlockInfo DI; + DI.BB = BB; + DI.outCount = DI.inCount = 0; + DI.inWeight = DI.outWeight = 0; + + // Read predecessors. + std::set<const BType*> ProcessedPreds; + pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB); + // If there are none, check for (0,BB) edge. + if (bpi == bpe) { + DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); DI.inCount++; } - } + for (;bpi != bpe; ++bpi) { + if (ProcessedPreds.insert(*bpi).second) { + DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); + DI.inCount++; + } + } - // Read successors. - std::set<const BasicBlock*> ProcessedSuccs; - succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - // If there is an (0,BB) edge, consider it too. (This is done not only when - // there are no successors, but every time; not every function contains - // return blocks with no successors (think loop latch as return block)). - double w = PI->getEdgeWeight(PI->getEdge(BB,0)); - if (w != ProfileInfo::MissingValue) { - DI.outWeight += w; - DI.outCount++; - } - for (;bbi != bbe; ++bbi) { - if (ProcessedSuccs.insert(*bbi).second) { - DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); + // Read successors. + std::set<const BType*> ProcessedSuccs; + succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // If there is an (0,BB) edge, consider it too. (This is done not only when + // there are no successors, but every time; not every function contains + // return blocks with no successors (think loop latch as return block)). + double w = PI->getEdgeWeight(PI->getEdge(BB,0)); + if (w != ProfileInfoT<FType, BType>::MissingValue) { + DI.outWeight += w; DI.outCount++; } - } + for (;bbi != bbe; ++bbi) { + if (ProcessedSuccs.insert(*bbi).second) { + DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); + DI.outCount++; + } + } - // Read block weight. - DI.BBWeight = PI->getExecutionCount(BB); - CheckValue(DI.BBWeight == ProfileInfo::MissingValue, - "ASSERT:BasicBlock has missing value", &DI); - - // Check if this block is a setjmp target. - bool isSetJmpTarget = false; - if (DI.outWeight > DI.inWeight) { - for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end(); - i != ie; ++i) { - if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { - Function *F = CI->getCalledFunction(); - if (F && (F->getNameStr() == "_setjmp")) { - isSetJmpTarget = true; break; + // Read block weight. + DI.BBWeight = PI->getExecutionCount(BB); + CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue, + "BasicBlock has missing value", &DI); + CheckValue(DI.BBWeight < 0, + "BasicBlock has negative value", &DI); + + // Check if this block is a setjmp target. + bool isSetJmpTarget = false; + if (DI.outWeight > DI.inWeight) { + for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FType *F = CI->getCalledFunction(); + if (F && (F->getNameStr() == "_setjmp")) { + isSetJmpTarget = true; break; + } } } } - } - // Check if this block is eventually reaching exit. - bool isExitReachable = false; - if (DI.inWeight > DI.outWeight) { - for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end(); - i != ie; ++i) { - if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { - FisVisited.clear(); - isExitReachable |= exitReachable(CI->getCalledFunction()); - if (isExitReachable) break; + // Check if this block is eventually reaching exit. + bool isExitReachable = false; + if (DI.inWeight > DI.outWeight) { + for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast<CallInst>(&*i)) { + FType *F = CI->getCalledFunction(); + if (F) { + FisVisited.clear(); + isExitReachable |= exitReachable(F); + } else { + // This is a call to a pointer, all bets are off... + isExitReachable = true; + } + if (isExitReachable) break; + } } } - } - if (DI.inCount > 0 && DI.outCount == 0) { - // If this is a block with no successors. - if (!isSetJmpTarget) { - CheckValue(!Equals(DI.inWeight,DI.BBWeight), - "ASSERT:inWeight and BBWeight do not match", &DI); + if (DI.inCount > 0 && DI.outCount == 0) { + // If this is a block with no successors. + if (!isSetJmpTarget) { + CheckValue(!Equals(DI.inWeight,DI.BBWeight), + "inWeight and BBWeight do not match", &DI); + } + } else if (DI.inCount == 0 && DI.outCount > 0) { + // If this is a block with no predecessors. + if (!isExitReachable) + CheckValue(!Equals(DI.BBWeight,DI.outWeight), + "BBWeight and outWeight do not match", &DI); + } else { + // If this block has successors and predecessors. + if (DI.inWeight > DI.outWeight && !isExitReachable) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "inWeight and outWeight do not match", &DI); + if (DI.inWeight < DI.outWeight && !isSetJmpTarget) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "inWeight and outWeight do not match", &DI); } - } else if (DI.inCount == 0 && DI.outCount > 0) { - // If this is a block with no predecessors. - if (!isExitReachable) - CheckValue(!Equals(DI.BBWeight,DI.outWeight), - "ASSERT:BBWeight and outWeight do not match", &DI); - } else { - // If this block has successors and predecessors. - if (DI.inWeight > DI.outWeight && !isExitReachable) - CheckValue(!Equals(DI.inWeight,DI.outWeight), - "ASSERT:inWeight and outWeight do not match", &DI); - if (DI.inWeight < DI.outWeight && !isSetJmpTarget) - CheckValue(!Equals(DI.inWeight,DI.outWeight), - "ASSERT:inWeight and outWeight do not match", &DI); - } - // Mark this block as visited, rescurse into successors. - BBisVisited.insert(BB); - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - recurseBasicBlock(*bbi); + // Mark this block as visited, rescurse into successors. + BBisVisited.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + recurseBasicBlock(*bbi); + } } -} -bool ProfileVerifierPass::runOnFunction(Function &F) { - PI = &getAnalysis<ProfileInfo>(); + template<class FType, class BType> + bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) { + PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >(); + if (!PI) + ASSERTMESSAGE("No ProfileInfo available"); + + // Prepare global variables. + PrintedDebugTree = false; + BBisVisited.clear(); + + // Fetch entry block and recurse into it. + const BType *entry = &F.getEntryBlock(); + recurseBasicBlock(entry); + + if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry)) + ASSERTMESSAGE("Function count and entry block count do not match"); - // Prepare global variables. - PrintedDebugTree = false; - BBisVisited.clear(); + return false; + } + + template<class FType, class BType> + char ProfileVerifierPassT<FType, BType>::ID = 0; +} - // Fetch entry block and recurse into it. - const BasicBlock *entry = &F.getEntryBlock(); - recurseBasicBlock(entry); +static RegisterPass<ProfileVerifierPass> +X("profile-verifier", "Verify profiling information", false, true); - if (!DisableAssertions) - assert((PI->getExecutionCount(&F)==PI->getExecutionCount(entry)) && - "Function count and entry block count do not match"); - return false; +namespace llvm { + FunctionPass *createProfileVerifierPass() { + return new ProfileVerifierPass(ProfileVerifierDisableAssertions); + } } + diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index d674ee8..7157d47 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -357,7 +357,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); - // Decend down the pointer's type and attempt to convert the other + // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of // the indices index into the element or field type selected by the @@ -628,7 +628,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = - next(BasicBlock::iterator(cast<Instruction>(V))); + llvm::next(BasicBlock::iterator(cast<Instruction>(V))); while (isa<PHINode>(NewInsertPt)) ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); @@ -844,7 +844,7 @@ Value *SCEVExpander::expand(const SCEV *S) { if (L && S->hasComputableLoopEvolution(L)) InsertPt = L->getHeader()->getFirstNonPHI(); while (isInsertedInstruction(InsertPt)) - InsertPt = next(BasicBlock::iterator(InsertPt)); + InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); break; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 31d3ccc..22c6e3b 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -659,7 +659,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: - Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth(); + Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; case Instruction::AShr: diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 1b7c9c6..cad1d3b 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -540,6 +540,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(arm_apcscc); KEYWORD(arm_aapcscc); KEYWORD(arm_aapcs_vfpcc); + KEYWORD(msp430_intrcc); KEYWORD(cc); KEYWORD(c); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index a92dbf8..0333eed 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -581,6 +581,37 @@ bool LLParser::ParseStandaloneMetadata() { return false; } +/// ParseInlineMetadata: +/// !{type %instr} +/// !{...} MDNode +/// !"foo" MDString +bool LLParser::ParseInlineMetadata(Value *&V, PerFunctionState &PFS) { + assert(Lex.getKind() == lltok::Metadata && "Only for Metadata"); + V = 0; + + Lex.Lex(); + if (Lex.getKind() == lltok::lbrace) { + Lex.Lex(); + if (ParseTypeAndValue(V, PFS) || + ParseToken(lltok::rbrace, "expected end of metadata node")) + return true; + + Value *Vals[] = { V }; + V = MDNode::get(Context, Vals, 1); + return false; + } + + // Standalone metadata reference + // !{ ..., !42, ... } + if (!ParseMDNode((MetadataBase *&)V)) + return false; + + // MDString: + // '!' STRINGCONSTANT + if (ParseMDString((MetadataBase *&)V)) return true; + return false; +} + /// ParseAlias: /// ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee /// Aliasee @@ -1043,6 +1074,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= 'arm_apcscc' /// ::= 'arm_aapcscc' /// ::= 'arm_aapcs_vfpcc' +/// ::= 'msp430_intrcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { @@ -1056,6 +1088,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break; case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break; case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break; + case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break; case lltok::kw_cc: { unsigned ArbitraryCC; Lex.Lex(); @@ -1377,15 +1410,23 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, // Parse the argument. LocTy ArgLoc; PATypeHolder ArgTy(Type::getVoidTy(Context)); - unsigned ArgAttrs1, ArgAttrs2; + unsigned ArgAttrs1 = Attribute::None; + unsigned ArgAttrs2 = Attribute::None; Value *V; - if (ParseType(ArgTy, ArgLoc) || - ParseOptionalAttrs(ArgAttrs1, 0) || - ParseValue(ArgTy, V, PFS) || - // FIXME: Should not allow attributes after the argument, remove this in - // LLVM 3.0. - ParseOptionalAttrs(ArgAttrs2, 3)) + if (ParseType(ArgTy, ArgLoc)) return true; + + if (Lex.getKind() == lltok::Metadata) { + if (ParseInlineMetadata(V, PFS)) + return true; + } else { + if (ParseOptionalAttrs(ArgAttrs1, 0) || + ParseValue(ArgTy, V, PFS) || + // FIXME: Should not allow attributes after the argument, remove this + // in LLVM 3.0. + ParseOptionalAttrs(ArgAttrs2, 3)) + return true; + } ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2)); } diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 1112dc4..d14b1cb 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -279,7 +279,9 @@ namespace llvm { LocTy Loc; return ParseTypeAndBasicBlock(BB, Loc, PFS); } - + + bool ParseInlineMetadata(Value *&V, PerFunctionState &PFS); + struct ParamInfo { LocTy Loc; Value *V; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 797c32e..1165766 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -69,6 +69,7 @@ namespace lltok { kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_x86_stdcallcc, kw_x86_fastcallcc, kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc, + kw_msp430_intrcc, kw_signext, kw_zeroext, diff --git a/lib/Bitcode/Reader/Deserialize.cpp b/lib/Bitcode/Reader/Deserialize.cpp index 67607ef..b8e720a 100644 --- a/lib/Bitcode/Reader/Deserialize.cpp +++ b/lib/Bitcode/Reader/Deserialize.cpp @@ -413,7 +413,7 @@ uintptr_t Deserializer::ReadInternalRefPtr() { return GetFinalPtr(E); } -void Deserializer::BPEntry::SetPtr(BPNode*& FreeList, void* P) { +void BPEntry::SetPtr(BPNode*& FreeList, void* P) { BPNode* Last = NULL; for (BPNode* N = Head; N != NULL; N=N->Next) { diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 8e3f8e7..bb61682 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -38,16 +38,19 @@ DebugMod("agg-antidep-debugmod", cl::desc("Debug control for aggressive anti-dep breaker"), cl::init(0), cl::Hidden); -AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) : - GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) { - // Initialize all registers to be in their own group. Initially we - // assign the register to the same-indexed GroupNode. - for (unsigned i = 0; i < TargetRegisterInfo::FirstVirtualRegister; ++i) +AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, + MachineBasicBlock *BB) : + NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) { + + const unsigned BBSize = BB->size(); + for (unsigned i = 0; i < NumTargetRegs; ++i) { + // Initialize all registers to be in their own group. Initially we + // assign the register to the same-indexed GroupNode. GroupNodeIndices[i] = i; - - // Initialize the indices to indicate that no registers are live. - std::fill(KillIndices, array_endof(KillIndices), ~0u); - std::fill(DefIndices, array_endof(DefIndices), BB->size()); + // Initialize the indices to indicate that no registers are live. + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } } unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) @@ -64,7 +67,7 @@ void AggressiveAntiDepState::GetGroupRegs( std::vector<unsigned> &Regs, std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs) { - for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) { + for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) { if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0)) Regs.push_back(Reg); } @@ -137,7 +140,7 @@ AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { assert(State == NULL); - State = new AggressiveAntiDepState(BB); + State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); unsigned *KillIndices = State->GetKillIndices(); @@ -220,7 +223,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, DEBUG(errs() << "\tRegs:"); unsigned *DefIndices = State->GetDefIndices(); - for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) { + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { // If Reg is current live, then mark that it can't be renamed as // we don't know the extent of its live-range anymore (now that it // has been scheduled). If it is not live but was defined in the diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 8154d2d..d385a21 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -44,6 +44,10 @@ namespace llvm { } RegisterReference; private: + /// NumTargetRegs - Number of non-virtual target registers + /// (i.e. TRI->getNumRegs()). + const unsigned NumTargetRegs; + /// GroupNodes - Implements a disjoint-union data structure to /// form register groups. A node is represented by an index into /// the vector. A node can "point to" itself to indicate that it @@ -69,7 +73,7 @@ namespace llvm { unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; public: - AggressiveAntiDepState(MachineBasicBlock *BB); + AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); /// GetKillIndices - Return the kill indices. unsigned *GetKillIndices() { return KillIndices; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 993cdbf..44fd176 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1374,6 +1374,7 @@ void AsmPrinter::processDebugLoc(const MachineInstr *MI, unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, CurDLT.Scope); printLabel(L); + O << '\n'; DW->BeginScope(MI, L); PrevDLT = CurDLT; } @@ -1837,15 +1838,16 @@ void AsmPrinter::EmitComments(const MachineInstr &MI) const { // Print source line info. O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " SrcLine "; - if (DLT.Scope) { - DICompileUnit CU(DLT.Scope); - if (!CU.isNull()) - O << CU.getFilename() << " "; - } - O << DLT.Line; + O << MAI->getCommentString() << ' '; + DIScope Scope(DLT.Scope); + // Omit the directory, because it's likely to be long and uninteresting. + if (!Scope.isNull()) + O << Scope.getFilename(); + else + O << "<unknown>"; + O << ':' << DLT.Line; if (DLT.Col != 0) - O << ":" << DLT.Col; + O << ':' << DLT.Col; Newline = true; } @@ -1857,35 +1859,40 @@ void AsmPrinter::EmitComments(const MachineInstr &MI) const { // We assume a single instruction only has a spill or reload, not // both. + const MachineMemOperand *MMO; if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); if (Newline) O << '\n'; O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Reload"; + O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Reload"; Newline = true; } } - else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) { + else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { if (Newline) O << '\n'; O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Folded Reload"; + O << MAI->getCommentString() << ' ' + << MMO->getSize() << "-byte Folded Reload"; Newline = true; } } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); if (Newline) O << '\n'; O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Spill"; + O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Spill"; Newline = true; } } - else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) { + else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) { if (FrameInfo->isSpillSlotObjectIndex(FI)) { if (Newline) O << '\n'; O.PadToColumn(MAI->getCommentColumn()); - O << MAI->getCommentString() << " Folded Spill"; + O << MAI->getCommentString() << ' ' + << MMO->getSize() << "-byte Folded Spill"; Newline = true; } } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index dc6a70a..cad8b89 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -274,7 +274,7 @@ namespace llvm { }; //===--------------------------------------------------------------------===// - /// DIEString - A string value DIE. + /// DIEString - A string value DIE. This DIE keeps string reference only. /// class DIEString : public DIEValue { const StringRef Str; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index c2e1e05..c200a46 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -330,8 +330,8 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, Die->addValue(Attribute, Form, Value); } -/// addString - Add a string attribute data and value. -/// +/// addString - Add a string attribute data and value. DIEString only +/// keeps string reference. void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, const StringRef String) { DIEValue *Value = new DIEString(String); @@ -393,7 +393,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { return; unsigned Line = V->getLineNumber(); - unsigned FileID = findCompileUnit(V->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(V->getCompileUnit())->getID(); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -407,7 +407,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) { return; unsigned Line = G->getLineNumber(); - unsigned FileID = findCompileUnit(G->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(G->getCompileUnit())->getID(); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -425,7 +425,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { unsigned Line = SP->getLineNumber(); - unsigned FileID = findCompileUnit(SP->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(SP->getCompileUnit())->getID(); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -440,7 +440,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { return; unsigned Line = Ty->getLineNumber(); - unsigned FileID = findCompileUnit(CU).getID(); + unsigned FileID = findCompileUnit(CU)->getID(); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -738,13 +738,49 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, addBlock(Die, Attribute, 0, Block); } +/// addToContextOwner - Add Die into the list of its context owner's children. +void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { + if (Context.isNull()) + ModuleCU->addDie(Die); + else if (Context.isType()) { + DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode())); + ContextDIE->addChild(Die); + } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode())) + ContextDIE->addChild(Die); + else + ModuleCU->addDie(Die); +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { + DIE *TyDIE = ModuleCU->getDIE(Ty.getNode()); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = new DIE(dwarf::DW_TAG_base_type); + ModuleCU->insertDIE(Ty.getNode(), TyDIE); + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty.getNode())); + else if (Ty.isCompositeType()) + constructTypeDIE(*TyDIE, DICompositeType(Ty.getNode())); + else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty.getNode())); + } + + addToContextOwner(TyDIE, Ty.getContext()); + return TyDIE; +} + /// addType - Add a new type attribute to the specified entity. -void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { +void DwarfDebug::addType(DIE *Entity, DIType Ty) { if (Ty.isNull()) return; // Check for pre-existence. - DIEEntry *Entry = DW_Unit->getDIEEntry(Ty.getNode()); + DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode()); // If it exists then use the existing value. if (Entry) { @@ -754,36 +790,17 @@ void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { // Set up proxy. Entry = createDIEEntry(); - DW_Unit->insertDIEEntry(Ty.getNode(), Entry); + ModuleCU->insertDIEEntry(Ty.getNode(), Entry); // Construct type. - DIE *Buffer = new DIE(dwarf::DW_TAG_base_type); - if (Ty.isBasicType()) - constructTypeDIE(DW_Unit, *Buffer, DIBasicType(Ty.getNode())); - else if (Ty.isCompositeType()) - constructTypeDIE(DW_Unit, *Buffer, DICompositeType(Ty.getNode())); - else { - assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(DW_Unit, *Buffer, DIDerivedType(Ty.getNode())); - } + DIE *Buffer = getOrCreateTypeDIE(Ty); - // Add debug information entry to entity and appropriate context. - DIE *Die = NULL; - DIDescriptor Context = Ty.getContext(); - if (!Context.isNull()) - Die = DW_Unit->getDIE(Context.getNode()); - - if (Die) - Die->addChild(Buffer); - else - DW_Unit->addDie(Buffer); Entry->setEntry(Buffer); Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); } /// constructTypeDIE - Construct basic type die from DIBasicType. -void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, - DIBasicType BTy) { +void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { // Get core information. StringRef Name = BTy.getName(); Buffer.setTag(dwarf::DW_TAG_base_type); @@ -798,8 +815,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } /// constructTypeDIE - Construct derived type die from DIDerivedType. -void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, - DIDerivedType DTy) { +void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; @@ -812,7 +828,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Map to main type, void will not have a type. DIType FromTy = DTy.getTypeDerivedFrom(); - addType(DW_Unit, &Buffer, FromTy); + addType(&Buffer, FromTy); // Add name if not anonymous or intermediate type. if (!Name.empty()) @@ -828,8 +844,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } /// constructTypeDIE - Construct type DIE from DICompositeType. -void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, - DICompositeType CTy) { +void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Get core information. StringRef Name = CTy.getName(); @@ -840,7 +855,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, switch (Tag) { case dwarf::DW_TAG_vector_type: case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(DW_Unit, Buffer, &CTy); + constructArrayTypeDIE(Buffer, &CTy); break; case dwarf::DW_TAG_enumeration_type: { DIArray Elements = CTy.getTypeArray(); @@ -850,7 +865,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIE *ElemDie = NULL; DIEnumerator Enum(Elements.getElement(i).getNode()); if (!Enum.isNull()) { - ElemDie = constructEnumTypeDIE(DW_Unit, &Enum); + ElemDie = constructEnumTypeDIE(&Enum); Buffer.addChild(ElemDie); } } @@ -860,7 +875,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add return type. DIArray Elements = CTy.getTypeArray(); DIDescriptor RTy = Elements.getElement(0); - addType(DW_Unit, &Buffer, DIType(RTy.getNode())); + addType(&Buffer, DIType(RTy.getNode())); // Add prototype flag. addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); @@ -869,7 +884,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIDescriptor Ty = Elements.getElement(i); - addType(DW_Unit, Arg, DIType(Ty.getNode())); + addType(Arg, DIType(Ty.getNode())); Buffer.addChild(Arg); } } @@ -891,11 +906,9 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, continue; DIE *ElemDie = NULL; if (Element.getTag() == dwarf::DW_TAG_subprogram) - ElemDie = createSubprogramDIE(DW_Unit, - DISubprogram(Element.getNode())); + ElemDie = createSubprogramDIE(DISubprogram(Element.getNode())); else - ElemDie = createMemberDIE(DW_Unit, - DIDerivedType(Element.getNode())); + ElemDie = createMemberDIE(DIDerivedType(Element.getNode())); Buffer.addChild(ElemDie); } @@ -944,33 +957,32 @@ void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); if (L) addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - if (H) - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); Buffer.addChild(DW_Subrange); } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, +void DwarfDebug::constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy) { Buffer.setTag(dwarf::DW_TAG_array_type); if (CTy->getTag() == dwarf::DW_TAG_vector_type) addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); // Emit derived type. - addType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom()); + addType(&Buffer, CTy->getTypeDerivedFrom()); DIArray Elements = CTy->getTypeArray(); // Get an anonymous type for index type. - DIE *IdxTy = DW_Unit->getIndexTyDie(); + DIE *IdxTy = ModuleCU->getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. IdxTy = new DIE(dwarf::DW_TAG_base_type); addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); - DW_Unit->addDie(IdxTy); - DW_Unit->setIndexTyDie(IdxTy); + ModuleCU->addDie(IdxTy); + ModuleCU->setIndexTyDie(IdxTy); } // Add subranges to array type. @@ -982,7 +994,7 @@ void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { +DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) { DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); StringRef Name = ETy->getName(); addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); @@ -992,8 +1004,7 @@ DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { } /// createGlobalVariableDIE - Create new DIE using GV. -DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit, - const DIGlobalVariable &GV) { +DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) { // If the global variable was optmized out then no need to create debug info // entry. if (!GV.getGlobal()) return NULL; @@ -1014,7 +1025,7 @@ DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit, addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, LinkageName); } - addType(DW_Unit, GVDie, GV.getType()); + addType(GVDie, GV.getType()); if (!GV.isLocalToUnit()) addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(GVDie, &GV); @@ -1030,13 +1041,13 @@ DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit, } /// createMemberDIE - Create new member DIE. -DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ +DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) { DIE *MemberDie = new DIE(DT.getTag()); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - addType(DW_Unit, MemberDie, DT.getTypeDerivedFrom()); + addType(MemberDie, DT.getTypeDerivedFrom()); addSourceLine(MemberDie, &DT); @@ -1073,21 +1084,27 @@ DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_protected); else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, dwarf::DW_ACCESS_private); - + else if (DT.getTag() == dwarf::DW_TAG_inheritance) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + dwarf::DW_VIRTUALITY_virtual); return MemberDie; } /// createSubprogramDIE - Create new DIE using SP. -DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit, - const DISubprogram &SP, - bool IsConstructor, - bool IsInlined) { - DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram); +DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { + DIE *SPDie = ModuleCU->getDIE(SP.getNode()); + if (SPDie) + return SPDie; + + SPDie = new DIE(dwarf::DW_TAG_subprogram); addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); StringRef LinkageName = SP.getLinkageName(); @@ -1103,9 +1120,6 @@ DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit, } addSourceLine(SPDie, &SP); - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - // Add prototyped tag, if C or ObjC. unsigned Lang = SP.getCompileUnit().getLanguage(); if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 || @@ -1113,98 +1127,56 @@ DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit, addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); unsigned SPTag = SPTy.getTag(); - if (!IsConstructor) { - if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) - addType(DW_Unit, SPDie, SPTy); - else - addType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode())); + + if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) + addType(SPDie, SPTy); + else + addType(SPDie, DIType(Args.getElement(0).getNode())); + + unsigned VK = SP.getVirtuality(); + if (VK) { + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + DIEBlock *Block = new DIEBlock(); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + ContainingTypeMap.insert(std::make_pair(SPDie, WeakVH(SP.getContainingType().getNode()))); } - if (!SP.isDefinition()) { + if (MakeDecl || !SP.isDefinition()) { addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. Do not add arguments for subprogram definition. They will - // be handled through RecordVariable. + // be handled while processing variables. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(DW_Unit, Arg, DIType(Args.getElement(i).getNode())); + addType(Arg, DIType(Args.getElement(i).getNode())); addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? SPDie->addChild(Arg); } } // DW_TAG_inlined_subroutine may refer to this DIE. - DW_Unit->insertDIE(SP.getNode(), SPDie); + ModuleCU->insertDIE(SP.getNode(), SPDie); return SPDie; } /// findCompileUnit - Get the compile unit for the given descriptor. /// -CompileUnit &DwarfDebug::findCompileUnit(DICompileUnit Unit) const { +CompileUnit *DwarfDebug::findCompileUnit(DICompileUnit Unit) { DenseMap<Value *, CompileUnit *>::const_iterator I = CompileUnitMap.find(Unit.getNode()); - assert(I != CompileUnitMap.end() && "Missing compile unit."); - return *I->second; -} - -/// createDbgScopeVariable - Create a new scope variable. -/// -DIE *DwarfDebug::createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { - // Get the descriptor. - const DIVariable &VD = DV->getVariable(); - StringRef Name = VD.getName(); - if (Name.empty()) - return NULL; - - // Translate tag to proper Dwarf tag. The result variable is dropped for - // now. - unsigned Tag; - switch (VD.getTag()) { - case dwarf::DW_TAG_return_variable: - return NULL; - case dwarf::DW_TAG_arg_variable: - Tag = dwarf::DW_TAG_formal_parameter; - break; - case dwarf::DW_TAG_auto_variable: // fall thru - default: - Tag = dwarf::DW_TAG_variable; - break; - } - - // Define variable debug information entry. - DIE *VariableDie = new DIE(Tag); - addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - // Add source line info if available. - addSourceLine(VariableDie, &VD); - - // Add variable type. - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (VD.isBlockByrefVariable()) - addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name)); - else - addType(Unit, VariableDie, VD.getType()); - - // Add variable address. - // Variables for abstract instances of inlined functions don't get a - // location. - MachineLocation Location; - unsigned FrameReg; - int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg); - Location.set(FrameReg, Offset); - - - if (VD.hasComplexAddress()) - addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else if (VD.isBlockByrefVariable()) - addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else - addAddress(VariableDie, dwarf::DW_AT_location, Location); - - return VariableDie; + if (I == CompileUnitMap.end()) + return constructCompileUnit(Unit.getNode()); + return I->second; } /// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction. @@ -1295,6 +1267,28 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { DIE *SPDie = ModuleCU->getDIE(SPNode); assert (SPDie && "Unable to find subprogram DIE!"); + DISubprogram SP(SPNode); + if (SP.isDefinition() && !SP.getContext().isCompileUnit()) { + addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Args.getElement(i).getNode())); + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + + ModuleCU->addDie(SPDie); + } + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, DWLabel("func_begin", SubprogramCount)); addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, @@ -1305,19 +1299,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { if (!DISubprogram(SPNode).isLocalToUnit()) addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - // If there are global variables at this scope then add their dies. - for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(), - SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { - MDNode *N = dyn_cast_or_null<MDNode>(*SGI); - if (!N) continue; - DIGlobalVariable GV(N); - if (GV.getContext().getNode() == SPNode) { - DIE *ScopedGVDie = createGlobalVariableDIE(ModuleCU, GV); - if (ScopedGVDie) - SPDie->addChild(ScopedGVDie); - } - } - return SPDie; } @@ -1401,8 +1382,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, - DbgScope *Scope, CompileUnit *Unit) { +DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Get the descriptor. const DIVariable &VD = DV->getVariable(); StringRef Name = VD.getName(); @@ -1451,9 +1431,9 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. if (VD.isBlockByrefVariable()) - addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name)); + addType(VariableDie, getBlockByrefType(VD.getType(), Name)); else - addType(Unit, VariableDie, VD.getType()); + addType(VariableDie, VD.getType()); } // Add variable address. @@ -1522,7 +1502,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { // Add variables to scope. SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) { - DIE *VariableDIE = constructVariableDIE(Variables[i], Scope, ModuleCU); + DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); if (VariableDIE) ScopeDIE->addChild(VariableDIE); } @@ -1579,7 +1559,7 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) return SrcId; } -void DwarfDebug::constructCompileUnit(MDNode *N) { +CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); @@ -1618,6 +1598,7 @@ void DwarfDebug::constructCompileUnit(MDNode *N) { CompileUnitMap[DIUnit.getNode()] = Unit; CompileUnits.push_back(Unit); + return Unit; } void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { @@ -1631,14 +1612,16 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { if (ModuleCU->getDIE(DI_GV.getNode())) return; - DIE *VariableDie = createGlobalVariableDIE(ModuleCU, DI_GV); + DIE *VariableDie = createGlobalVariableDIE(DI_GV); + if (!VariableDie) + return; // Add to map. ModuleCU->insertDIE(N, VariableDie); // Add to context owner. - ModuleCU->getCUDie()->addChild(VariableDie); - + addToContextOwner(VariableDie, DI_GV.getContext()); + // Expose as global. FIXME - need to check external flag. ModuleCU->addGlobal(DI_GV.getName(), VariableDie); @@ -1663,13 +1646,15 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) { // class type. return; - DIE *SubprogramDie = createSubprogramDIE(ModuleCU, SP); + DIE *SubprogramDie = createSubprogramDIE(SP); // Add to map. ModuleCU->insertDIE(N, SubprogramDie); // Add to context owner. - ModuleCU->getCUDie()->addChild(SubprogramDie); + if (SP.getContext().getNode() == SP.getCompileUnit().getNode()) + if (TopLevelDIEs.insert(SubprogramDie)) + TopLevelDIEsVector.push_back(SubprogramDie); // Expose as global. ModuleCU->addGlobal(SP.getName(), SubprogramDie); @@ -1709,21 +1694,16 @@ void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) { if (!ModuleCU) ModuleCU = CompileUnits[0]; - // Create DIEs for each of the externally visible global variables. - for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) { - DIGlobalVariable GV(*I); - if (GV.getContext().getNode() != GV.getCompileUnit().getNode()) - ScopedGVs.push_back(*I); - else - constructGlobalVariableDIE(*I); - } - // Create DIEs for each subprogram. for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), E = DbgFinder.subprogram_end(); I != E; ++I) constructSubprogramDIE(*I); + // Create DIEs for each global variable. + for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), + E = DbgFinder.global_variable_end(); I != E; ++I) + constructGlobalVariableDIE(*I); + MMI = mmi; shouldEmit = true; MMI->setDebugInfoAvailability(true); @@ -1770,6 +1750,22 @@ void DwarfDebug::endModule() { addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } + // Insert top level DIEs. + for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(), + TE = TopLevelDIEsVector.end(); TI != TE; ++TI) + ModuleCU->getCUDie()->addChild(*TI); + + for (DenseMap<DIE *, WeakVH>::iterator CI = ContainingTypeMap.begin(), + CE = ContainingTypeMap.end(); CI != CE; ++CI) { + DIE *SPDie = CI->first; + MDNode *N = dyn_cast_or_null<MDNode>(CI->second); + if (!N) continue; + DIE *NDie = ModuleCU->getDIE(N); + if (!NDie) continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + } + // Standard sections final addresses. Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); EmitLabel("text_end", 0); @@ -1898,6 +1894,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) { unsigned Label = MMI->NextLabelID(); Asm->printLabel(Label); + O << '\n'; SmallVector<DbgScope *, 2> &SD = I->second; for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end(); @@ -2092,17 +2089,15 @@ void DwarfDebug::endFunction(MachineFunction *MF) { MMI->getFrameMoves())); // Clear debug info - if (CurrentFnDbgScope) { - CurrentFnDbgScope = NULL; - DbgScopeMap.clear(); - DbgScopeBeginMap.clear(); - DbgScopeEndMap.clear(); - ConcreteScopes.clear(); - AbstractScopesList.clear(); - } + CurrentFnDbgScope = NULL; + DbgScopeMap.clear(); + DbgScopeBeginMap.clear(); + DbgScopeEndMap.clear(); + ConcreteScopes.clear(); + AbstractScopesList.clear(); Lines.clear(); - + if (TimePassesIsEnabled) DebugTimer->stopTimer(); } @@ -2337,13 +2332,16 @@ void DwarfDebug::emitDIE(DIE *Die) { } } -/// emitDebugInfo / emitDebugInfoPerCU - Emit the debug info section. +/// emitDebugInfo - Emit the debug info section. /// -void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) { - DIE *Die = Unit->getCUDie(); +void DwarfDebug::emitDebugInfo() { + // Start debug info section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfInfoSection()); + DIE *Die = ModuleCU->getCUDie(); // Emit the compile units header. - EmitLabel("info_begin", Unit->getID()); + EmitLabel("info_begin", ModuleCU->getID()); // Emit size of content not including length itself unsigned ContentSize = Die->getSize() + @@ -2364,17 +2362,10 @@ void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) { Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); - EmitLabel("info_end", Unit->getID()); + EmitLabel("info_end", ModuleCU->getID()); Asm->EOL(); -} - -void DwarfDebug::emitDebugInfo() { - // Start debug info section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfInfoSection()); - emitDebugInfoPerCU(ModuleCU); } /// emitAbbreviations - Emit the abbreviation section. @@ -2534,9 +2525,9 @@ void DwarfDebug::emitDebugLines() { std::pair<unsigned, unsigned> SourceID = getSourceDirectoryAndFileIds(LineInfo.getSourceID()); O << '\t' << MAI->getCommentString() << ' ' - << getSourceDirectoryName(SourceID.first) << ' ' + << getSourceDirectoryName(SourceID.first) << '/' << getSourceFileName(SourceID.second) - <<" :" << utostr_32(LineInfo.getLine()) << '\n'; + << ':' << utostr_32(LineInfo.getLine()) << '\n'; } // Define the line address. @@ -2672,24 +2663,30 @@ DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ Asm->EOL(); } -void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) { - EmitDifference("pubnames_end", Unit->getID(), - "pubnames_begin", Unit->getID(), true); +/// emitDebugPubNames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::emitDebugPubNames() { + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); + + EmitDifference("pubnames_end", ModuleCU->getID(), + "pubnames_begin", ModuleCU->getID(), true); Asm->EOL("Length of Public Names Info"); - EmitLabel("pubnames_begin", Unit->getID()); + EmitLabel("pubnames_begin", ModuleCU->getID()); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version"); EmitSectionOffset("info_begin", "section_info", - Unit->getID(), 0, true, false); + ModuleCU->getID(), 0, true, false); Asm->EOL("Offset of Compilation Unit Info"); - EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(), + EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(), true); Asm->EOL("Compilation Unit Length"); - const StringMap<DIE*> &Globals = Unit->getGlobals(); + const StringMap<DIE*> &Globals = ModuleCU->getGlobals(); for (StringMap<DIE*>::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); @@ -2700,21 +2697,11 @@ void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) { } Asm->EmitInt32(0); Asm->EOL("End Mark"); - EmitLabel("pubnames_end", Unit->getID()); + EmitLabel("pubnames_end", ModuleCU->getID()); Asm->EOL(); } -/// emitDebugPubNames - Emit visible names into a debug pubnames section. -/// -void DwarfDebug::emitDebugPubNames() { - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); - - emitDebugPubNamesPerCU(ModuleCU); -} - void DwarfDebug::emitDebugPubTypes() { // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection( diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 679d9b9..12ad322 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -154,12 +154,14 @@ class DwarfDebug : public Dwarf { /// (at the end of the module) as DW_AT_inline. SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs; + DenseMap<DIE *, WeakVH> ContainingTypeMap; + /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs. SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs; - /// ScopedGVs - Tracks global variables that are not at file scope. - /// For example void f() { static int b = 42; } - SmallVector<WeakVH, 4> ScopedGVs; + /// TopLevelDIEs - Collection of top level DIEs. + SmallPtrSet<DIE *, 4> TopLevelDIEs; + SmallVector<DIE *, 4> TopLevelDIEsVector; typedef SmallVector<DbgScope *, 2> ScopeVector; typedef DenseMap<const MachineInstr *, ScopeVector> @@ -307,53 +309,52 @@ class DwarfDebug : public Dwarf { void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); + /// addToContextOwner - Add Die into the list of its context owner's children. + void addToContextOwner(DIE *Die, DIDescriptor Context); + /// addType - Add a new type attribute to the specified entity. - void addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty); + void addType(DIE *Entity, DIType Ty); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(DIType Ty); void addPubTypes(DISubprogram SP); /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void constructTypeDIE(DIE &Buffer, DIBasicType BTy); /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy); /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy); + DIE *constructEnumTypeDIE(DIEnumerator *ETy); /// createGlobalVariableDIE - Create new DIE using GV. - DIE *createGlobalVariableDIE(CompileUnit *DW_Unit, - const DIGlobalVariable &GV); + DIE *createGlobalVariableDIE(const DIGlobalVariable &GV); /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT); + DIE *createMemberDIE(const DIDerivedType &DT); /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(CompileUnit *DW_Unit, - const DISubprogram &SP, - bool IsConstructor = false, - bool IsInlined = false); + DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false); /// findCompileUnit - Get the compile unit for the given descriptor. /// - CompileUnit &findCompileUnit(DICompileUnit Unit) const; - - /// createDbgScopeVariable - Create a new scope variable. - /// - DIE *createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); + CompileUnit *findCompileUnit(DICompileUnit Unit); /// getUpdatedDbgScope - Find or create DbgScope assicated with /// the instruction. Initialize scope and update scope hierarchy. @@ -384,7 +385,7 @@ class DwarfDebug : public Dwarf { DIE *constructInlinedScopeDIE(DbgScope *Scope); /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit); + DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S); /// constructScopeDIE - Construct a DIE for this scope. DIE *constructScopeDIE(DbgScope *Scope); @@ -405,10 +406,8 @@ class DwarfDebug : public Dwarf { /// void computeSizeAndOffsets(); - /// EmitDebugInfo / emitDebugInfoPerCU - Emit the debug info section. + /// EmitDebugInfo - Emit the debug info section. /// - void emitDebugInfoPerCU(CompileUnit *Unit); - void emitDebugInfo(); /// emitAbbreviations - Emit the abbreviation section. @@ -432,8 +431,6 @@ class DwarfDebug : public Dwarf { /// section. void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo); - void emitDebugPubNamesPerCU(CompileUnit *Unit); - /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void emitDebugPubNames(); @@ -488,7 +485,7 @@ class DwarfDebug : public Dwarf { /// as well. unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName); - void constructCompileUnit(MDNode *N); + CompileUnit *constructCompileUnit(MDNode *N); void constructGlobalVariableDIE(MDNode *N); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 1c8b8f4..3fd077f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -292,14 +292,13 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { Asm->EmitULEB128Bytes(is4Byte ? 4 : 8); Asm->EOL("Augmentation size"); + // We force 32-bits here because we've encoded our LSDA in the CIE with + // `dwarf::DW_EH_PE_sdata4'. And the CIE and FDE should agree. if (EHFrameInfo.hasLandingPads) - EmitReference("exception", EHFrameInfo.Number, true, false); - else { - if (is4Byte) - Asm->EmitInt32((int)0); - else - Asm->EmitInt64((int)0); - } + EmitReference("exception", EHFrameInfo.Number, true, true); + else + Asm->EmitInt32((int)0); + Asm->EOL("Language Specific Data Area"); } else { Asm->EmitULEB128Bytes(0); diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index aff1665..aa01c5b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -119,7 +119,6 @@ class DwarfException : public Dwarf { static inline unsigned getTombstoneKey() { return -2U; } static unsigned getHashValue(const unsigned &Key) { return Key; } static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; /// PadRange - Structure holding a try-range and the associated landing pad. diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 8a62eb2..3887e6d 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -427,7 +427,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I, static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); - MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB)); + MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector<MachineOperand, 4> Cond; if (I != MF->end() && @@ -805,7 +805,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a compile-time infinite loop repeatedly doing and undoing the same // transformations.) - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) { if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; @@ -833,7 +833,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { continue; // This is the QBB case described above if (!FBB) - FBB = next(MachineFunction::iterator(PBB)); + FBB = llvm::next(MachineFunction::iterator(PBB)); } // Failing case: the only way IBB can be reached from PBB is via // exception handling. Happens for landing pads. Would be nice @@ -1140,7 +1140,7 @@ ReoptimizeBlock: // falls through into MBB and we can't understand the prior block's branch // condition. if (MBB->empty()) { - bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB); + bool PredHasNoFallThrough = !PrevBB.canFallThrough(); if (PredHasNoFallThrough || !PriorUnAnalyzable || !PrevBB.isSuccessor(MBB)) { // If the prior block falls through into us, turn it into an @@ -1239,7 +1239,7 @@ ReoptimizeBlock: // B elsewhere // next: if (CurFallsThru) { - MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); TII->InsertBranch(*MBB, NextBB, 0, CurCond); } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 6f86614..7a969f0 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp BranchFolding.cpp + CalcSpillWeights.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp @@ -35,7 +36,9 @@ add_llvm_library(LLVMCodeGen MachinePassRegistry.cpp MachineRegisterInfo.cpp MachineSink.cpp + MachineSSAUpdater.cpp MachineVerifier.cpp + MaxStackAlignment.cpp ObjectCodeEmitter.cpp OcamlGC.cpp PHIElimination.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp new file mode 100644 index 0000000..dcffb8a2 --- /dev/null +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -0,0 +1,154 @@ +//===------------------------ CalcSpillWeights.cpp ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "calcspillweights" + +#include "llvm/Function.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +char CalculateSpillWeights::ID = 0; +static RegisterPass<CalculateSpillWeights> X("calcspillweights", + "Calculate spill weights"); + +void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { + au.addRequired<LiveIntervals>(); + au.addRequired<MachineLoopInfo>(); + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); +} + +bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { + + DEBUG(errs() << "********** Compute Spill Weights **********\n" + << "********** Function: " + << fn.getFunction()->getName() << '\n'); + + LiveIntervals *lis = &getAnalysis<LiveIntervals>(); + MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>(); + const TargetInstrInfo *tii = fn.getTarget().getInstrInfo(); + MachineRegisterInfo *mri = &fn.getRegInfo(); + + SmallSet<unsigned, 4> processed; + for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* mbb = mbbi; + SlotIndex mbbEnd = lis->getMBBEndIdx(mbb); + MachineLoop* loop = loopInfo->getLoopFor(mbb); + unsigned loopDepth = loop ? loop->getLoopDepth() : 0; + bool isExiting = loop ? loop->isLoopExiting(mbb) : false; + + for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end(); + mii != mie; ++mii) { + const MachineInstr *mi = mii; + if (tii->isIdentityCopy(*mi)) + continue; + + if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) + continue; + + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + const MachineOperand &mopi = mi->getOperand(i); + if (!mopi.isReg() || mopi.getReg() == 0) + continue; + unsigned reg = mopi.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) + continue; + // Multiple uses of reg by the same instruction. It should not + // contribute to spill weight again. + if (!processed.insert(reg)) + continue; + + bool hasDef = mopi.isDef(); + bool hasUse = !hasDef; + for (unsigned j = i+1; j != e; ++j) { + const MachineOperand &mopj = mi->getOperand(j); + if (!mopj.isReg() || mopj.getReg() != reg) + continue; + hasDef |= mopj.isDef(); + hasUse |= mopj.isUse(); + if (hasDef && hasUse) + break; + } + + LiveInterval ®Int = lis->getInterval(reg); + float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth); + if (hasDef && isExiting) { + // Looks like this is a loop count variable update. + SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex(); + const LiveRange *dlr = + lis->getInterval(reg).getLiveRangeContaining(defIdx); + if (dlr->end > mbbEnd) + weight *= 3.0F; + } + regInt.weight += weight; + } + processed.clear(); + } + } + + for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) { + LiveInterval &li = *I->second; + if (TargetRegisterInfo::isVirtualRegister(li.reg)) { + // If the live interval length is essentially zero, i.e. in every live + // range the use follows def immediately, it doesn't make sense to spill + // it and hope it will be easier to allocate for this li. + if (isZeroLengthInterval(&li)) { + li.weight = HUGE_VALF; + continue; + } + + bool isLoad = false; + SmallVector<LiveInterval*, 4> spillIs; + if (lis->isReMaterializable(li, spillIs, isLoad)) { + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If non of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isLoad) + li.weight *= 0.9F; + else + li.weight *= 0.5F; + } + + // Slightly prefer live interval that has been assigned a preferred reg. + std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg); + if (Hint.first || Hint.second) + li.weight *= 1.01F; + + // Divide the weight of the interval by its size. This encourages + // spilling of intervals that are large and have few uses, and + // discourages spilling of small intervals with many uses. + li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM; + } + } + + return false; +} + +/// Returns true if the given live interval is zero length. +bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const { + for (LiveInterval::Ranges::const_iterator + i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) + if (i->end.getPrevIndex() > i->start) + return false; + return true; +} diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index e9844d8..ff71f6b 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -182,7 +182,7 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, // Move it and all the blocks that can reach it via fallthrough edges // exclusively, to keep existing fallthrough edges intact. MachineFunction::iterator Begin = Pred; - MachineFunction::iterator End = next(Begin); + MachineFunction::iterator End = llvm::next(Begin); while (Begin != MF.begin()) { MachineFunction::iterator Prior = prior(Begin); if (Prior == MF.begin()) @@ -255,7 +255,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // to the top of the loop to avoid loosing that fallthrough. Otherwise append // them to the bottom, even if it previously had a fallthrough, on the theory // that it's worth an extra branch to keep the loop contiguous. - MachineFunction::iterator InsertPt = next(MachineFunction::iterator(BotMBB)); + MachineFunction::iterator InsertPt = + llvm::next(MachineFunction::iterator(BotMBB)); bool InsertAtTop = false; if (TopMBB != MF.begin() && !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && @@ -268,7 +269,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // with the loop header. SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks; for (MachineFunction::iterator I = TopMBB, - E = next(MachineFunction::iterator(BotMBB)); I != E; ++I) + E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I) ContiguousBlocks.insert(I); // Find non-contigous blocks and fix them. @@ -301,7 +302,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // Process this block and all loop blocks contiguous with it, to keep // them in their relative order. MachineFunction::iterator Begin = BB; - MachineFunction::iterator End = next(MachineFunction::iterator(BB)); + MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB)); for (; End != MF.end(); ++End) { if (!L->contains(End)) break; if (!HasAnalyzableTerminator(End)) break; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 1b39fec..3c7961c2 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -43,8 +43,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { static_cast<const TargetRegisterClass *>(0)); // Initialize the indices to indicate that no registers are live. - std::fill(KillIndices, array_endof(KillIndices), ~0u); - std::fill(DefIndices, array_endof(DefIndices), BB->size()); + const unsigned BBSize = BB->size(); + for (unsigned i = 0; i < TRI->getNumRegs(); ++i) { + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } // Clear "do not change" set. KeepRegs.clear(); @@ -122,7 +125,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, // may have been rescheduled and its lifetime may overlap with registers // in ways not reflected in our current liveness state. For each such // register, adjust the liveness state to be conservatively correct. - for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { assert(KillIndices[Reg] == ~0u && "Clobbered register is live!"); // Mark this register to be non-renamable. diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 242cba5..297dd31 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -74,6 +74,9 @@ EnableFastISelOption("fast-isel", cl::Hidden, static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden, cl::desc("Split GEPs and run no-load GVN")); +static cl::opt<bool> PreAllocTailDup("pre-regalloc-taildup", cl::Hidden, + cl::desc("Pre-register allocation tail duplication")); + LLVMTargetMachine::LLVMTargetMachine(const Target &T, const std::string &TargetTriple) : TargetMachine(T) { @@ -302,6 +305,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, /* allowDoubleDefs= */ true); } + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None && + !DisableTailDuplicate && PreAllocTailDup) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) printAndVerify(PM, "After PreRegAlloc passes", @@ -348,7 +358,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Tail duplication. if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { - PM.add(createTailDuplicatePass()); + PM.add(createTailDuplicatePass(false)); printAndVerify(PM, "After TailDuplicate"); } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 8d632cb..cc286aa 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -847,7 +847,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { if (vni->isUnused()) { OS << "x"; } else { - if (!vni->isDefAccurate()) + if (!vni->isDefAccurate() && !vni->isPHIDef()) OS << "?"; else OS << vni->def; diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 24adf36..8806439f 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -149,44 +149,69 @@ void LiveIntervals::dumpInstrs() const { printInstrs(errs()); } -/// conflictsWithPhysRegDef - Returns true if the specified register -/// is defined during the duration of the specified interval. -bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li, - VirtRegMap &vrm, unsigned reg) { - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (SlotIndex index = I->start.getBaseIndex(), - end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - index != end; - index = index.getNextIndex()) { - // skip deleted instructions - while (index != end && !getInstructionFromIndex(index)) - index = index.getNextIndex(); - if (index == end) break; +bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, + VirtRegMap &vrm, unsigned reg) { + // We don't handle fancy stuff crossing basic block boundaries + if (li.ranges.size() != 1) + return true; + const LiveRange &range = li.ranges.front(); + SlotIndex idx = range.start.getBaseIndex(); + SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex(); + + // Skip deleted instructions + MachineInstr *firstMI = getInstructionFromIndex(idx); + while (!firstMI && idx != end) { + idx = idx.getNextIndex(); + firstMI = getInstructionFromIndex(idx); + } + if (!firstMI) + return false; - MachineInstr *MI = getInstructionFromIndex(index); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) - if (SrcReg == li.reg || DstReg == li.reg) - continue; - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg()) - continue; - unsigned PhysReg = mop.getReg(); - if (PhysReg == 0 || PhysReg == li.reg) + // Find last instruction in range + SlotIndex lastIdx = end.getPrevIndex(); + MachineInstr *lastMI = getInstructionFromIndex(lastIdx); + while (!lastMI && lastIdx != idx) { + lastIdx = lastIdx.getPrevIndex(); + lastMI = getInstructionFromIndex(lastIdx); + } + if (!lastMI) + return false; + + // Range cannot cross basic block boundaries or terminators + MachineBasicBlock *MBB = firstMI->getParent(); + if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator()) + return true; + + MachineBasicBlock::const_iterator E = lastMI; + ++E; + for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) { + const MachineInstr &MI = *I; + + // Allow copies to and from li.reg + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (SrcReg == li.reg || DstReg == li.reg) + continue; + + // Check for operands using reg + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand& mop = MI.getOperand(i); + if (!mop.isReg()) + continue; + unsigned PhysReg = mop.getReg(); + if (PhysReg == 0 || PhysReg == li.reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { + if (!vrm.hasPhys(PhysReg)) continue; - if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { - if (!vrm.hasPhys(PhysReg)) - continue; - PhysReg = vrm.getPhys(PhysReg); - } - if (PhysReg && tri_->regsOverlap(PhysReg, reg)) - return true; + PhysReg = vrm.getPhys(PhysReg); } + if (PhysReg && tri_->regsOverlap(PhysReg, reg)) + return true; } } + // No conflicts found. return false; } @@ -201,15 +226,9 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); index != end; index = index.getNextIndex()) { - // Skip deleted instructions. - MachineInstr *MI = 0; - while (index != end) { - MI = getInstructionFromIndex(index); - if (MI) - break; - index = index.getNextIndex(); - } - if (index == end) break; + MachineInstr *MI = getInstructionFromIndex(index); + if (!MI) + continue; // skip deleted instructions if (JoinedCopies.count(MI)) continue; @@ -374,8 +393,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; OldValNo->setCopy(0); - if (MO.isEarlyClobber()) - OldValNo->setHasRedefByEC(true); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); @@ -411,7 +428,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, interval.removeRange(Start, End); assert(interval.ranges.size() == 1 && "Newly discovered PHI interval has >1 ranges."); - MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex()); + MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def); VNI->addKill(indexes_->getTerminatorGap(killMBB)); VNI->setHasPHIKill(true); DEBUG({ @@ -422,7 +439,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Replace the interval with one of a NEW value number. Note that this // value number isn't actually defined by an instruction, weird huh? :) LiveRange LR(Start, End, - interval.getNextValue(SlotIndex(getMBBStartIdx(mbb), true), + interval.getNextValue(SlotIndex(getMBBStartIdx(Killer->getParent()), true), 0, false, VNInfoAllocator)); LR.valno->setIsPHIDef(true); DEBUG(errs() << " replace range with " << LR); @@ -513,8 +530,6 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, if (mi->isRegTiedToUseOperand(DefIdx)) { // Two-address instruction. end = baseIndex.getDefIndex(); - assert(!mi->getOperand(DefIdx).isEarlyClobber() && - "Two address instruction is an early clobber?"); } else { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that defines @@ -730,8 +745,16 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { // If it's extracting out of a physical register, return the sub-register. unsigned Reg = VNI->getCopy()->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm(); + unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg(); + if (SrcSubReg == DstSubReg) + // %reg1034:3<def> = EXTRACT_SUBREG %EDX, 3 + // reg1034 can still be coalesced to EDX. + return Reg; + assert(DstSubReg == 0); Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm()); + } return Reg; } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG || VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 68f80ac..3c88e37 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -720,6 +720,51 @@ bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, return findKill(&MBB); } +bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { + LiveVariables::VarInfo &VI = getVarInfo(Reg); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + std::vector<MachineBasicBlock*> OpSuccBlocks; + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + E = MBB.succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (VI.AliveBlocks.test(SuccIdx)) + return true; + OpSuccBlocks.push_back(SuccMBB); + } + + // Check to see if this value is live because there is a use in a successor + // that kills it. + switch (OpSuccBlocks.size()) { + case 1: { + MachineBasicBlock *SuccMBB = OpSuccBlocks[0]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB) + return true; + break; + } + case 2: { + MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1]; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (VI.Kills[i]->getParent() == SuccMBB1 || + VI.Kills[i]->getParent() == SuccMBB2) + return true; + break; + } + default: + std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), + VI.Kills[i]->getParent())) + return true; + } + return false; +} + /// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All /// variables that are live out of DomBB will be marked as passing live through /// BB. diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 30636a8..80eb6cd 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -312,7 +312,7 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { mbbi != mbbe; ++mbbi) { for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me;) { - MachineBasicBlock::iterator nmi = next(mi); + MachineBasicBlock::iterator nmi = llvm::next(mi); MachineInstr *MI = mi; if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { MadeChange |= LowerExtract(MI); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index e55e369..a58286d 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -290,7 +290,7 @@ void MachineBasicBlock::updateTerminator() { } else { // The block has a fallthrough conditional branch. MachineBasicBlock *MBBA = *succ_begin(); - MachineBasicBlock *MBBB = *next(succ_begin()); + MachineBasicBlock *MBBB = *llvm::next(succ_begin()); if (MBBA == TBB) std::swap(MBBB, MBBA); if (isLayoutSuccessor(TBB)) { if (TII->ReverseBranchCondition(Cond)) { @@ -359,15 +359,10 @@ bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { MachineFunction::const_iterator I(this); - return next(I) == MachineFunction::const_iterator(MBB); + return llvm::next(I) == MachineFunction::const_iterator(MBB); } bool MachineBasicBlock::canFallThrough() { - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); - bool BranchUnAnalyzable = TII->AnalyzeBranch(*this, TBB, FBB, Cond, true); - MachineFunction::iterator Fallthrough = this; ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. @@ -378,16 +373,21 @@ bool MachineBasicBlock::canFallThrough() { if (!isSuccessor(Fallthrough)) return false; - // If we couldn't analyze the branch, examine the last instruction. - // If the block doesn't end in a known control barrier, assume fallthrough - // is possible. The isPredicable check is needed because this code can be - // called during IfConversion, where an instruction which is normally a - // Barrier is predicated and thus no longer an actual control barrier. This - // is over-conservative though, because if an instruction isn't actually - // predicated we could still treat it like a barrier. - if (BranchUnAnalyzable) + // Analyze the branches, if any, at the end of the block. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond, true)) { + // If we couldn't analyze the branch, examine the last instruction. + // If the block doesn't end in a known control barrier, assume fallthrough + // is possible. The isPredicable check is needed because this code can be + // called during IfConversion, where an instruction which is normally a + // Barrier is predicated and thus no longer an actual control barrier. This + // is over-conservative though, because if an instruction isn't actually + // predicated we could still treat it like a barrier. return empty() || !back().getDesc().isBarrier() || back().getDesc().isPredicable(); + } // If there is no branch, control always falls through. if (TBB == 0) return true; @@ -461,18 +461,19 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, bool MadeChange = false; bool AddedFallThrough = false; - MachineFunction::iterator FallThru = next(MachineFunction::iterator(this)); + MachineFunction::iterator FallThru = + llvm::next(MachineFunction::iterator(this)); - // If this block ends with a conditional branch that falls through to its - // successor, set DestB as the successor. if (isCond) { + // If this block ends with a conditional branch that falls through to its + // successor, set DestB as the successor. if (DestB == 0 && FallThru != getParent()->end()) { DestB = FallThru; AddedFallThrough = true; } } else { // If this is an unconditional branch with no explicit dest, it must just be - // a fallthrough into DestB. + // a fallthrough into DestA. if (DestA == 0 && FallThru != getParent()->end()) { DestA = FallThru; AddedFallThrough = true; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index d20f446..dd6fd7e 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -328,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS) const { if (I->second) OS << " in reg%" << I->second; - if (next(I) != E) + if (llvm::next(I) != E) OS << ", "; } OS << '\n'; @@ -342,7 +342,7 @@ void MachineFunction::print(raw_ostream &OS) const { else OS << "%physreg" << *I; - if (next(I) != E) + if (llvm::next(I) != E) OS << " "; } OS << '\n'; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index f73a5a3..12b974d 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1058,6 +1058,22 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { return true; } +/// isConstantValuePHI - If the specified instruction is a PHI that always +/// merges together the same virtual register, return the register, otherwise +/// return 0. +unsigned MachineInstr::isConstantValuePHI() const { + if (getOpcode() != TargetInstrInfo::PHI) + return 0; + assert(getNumOperands() >= 3 && + "It's illegal to have a PHI without source operands"); + + unsigned Reg = getOperand(1).getReg(); + for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) + if (getOperand(i).getReg() != Reg) + return 0; + return Reg; +} + void MachineInstr::dump() const { errs() << " " << *this; } @@ -1150,9 +1166,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); DIScope Scope(DLT.Scope); OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. if (!Scope.isNull()) - OS << Scope.getDirectory() << ':' << Scope.getFilename() << ':'; - OS << DLT.Line << ":" << DLT.Col; + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << DLT.Line; + if (DLT.Col != 0) + OS << ':' << DLT.Col; } OS << "\n"; diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index db77d19..63f4f18 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -62,11 +62,11 @@ MachineBasicBlock *MachineLoop::getBottomBlock() { MachineBasicBlock *BotMBB = getHeader(); MachineFunction::iterator End = BotMBB->getParent()->end(); if (BotMBB != prior(End)) { - MachineBasicBlock *NextMBB = next(MachineFunction::iterator(BotMBB)); + MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); while (contains(NextMBB)) { BotMBB = NextMBB; - if (BotMBB == next(MachineFunction::iterator(BotMBB))) break; - NextMBB = next(MachineFunction::iterator(BotMBB)); + if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break; + NextMBB = llvm::next(MachineFunction::iterator(BotMBB)); } } return BotMBB; diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp new file mode 100644 index 0000000..292096f --- /dev/null +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -0,0 +1,393 @@ +//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MachineSSAUpdater class. It's based on SSAUpdater +// class in lib/Transforms/Utils. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; +typedef std::vector<std::pair<MachineBasicBlock*, unsigned> > + IncomingPredInfoTy; + +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { + return *static_cast<IncomingPredInfoTy*>(IPI); +} + + +MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, + SmallVectorImpl<MachineInstr*> *NewPHI) + : AV(0), IPI(0), InsertedPHIs(NewPHI) { + TII = MF.getTarget().getInstrInfo(); + MRI = &MF.getRegInfo(); +} + +MachineSSAUpdater::~MachineSSAUpdater() { + delete &getAvailableVals(AV); + delete &getIncomingPredInfo(IPI); +} + +/// Initialize - Reset this object to get ready for a new set of SSA +/// updates. ProtoValue is the value used to name PHI nodes. +void MachineSSAUpdater::Initialize(unsigned V) { + if (AV == 0) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + + if (IPI == 0) + IPI = new IncomingPredInfoTy(); + else + getIncomingPredInfo(IPI).clear(); + + VR = V; + VRC = MRI->getRegClass(VR); +} + +/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for +/// the specified block. +bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +/// AddAvailableValue - Indicate that a rewritten value is available in the +/// specified block with the specified value. +void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) { + getAvailableVals(AV)[BB] = V; +} + +/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is +/// live at the end of the specified block. +unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { + return GetValueAtEndOfBlockInternal(BB); +} + +static +unsigned LookForIdenticalPHI(MachineBasicBlock *BB, + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) { + if (BB->empty()) + return 0; + + MachineBasicBlock::iterator I = BB->front(); + if (I->getOpcode() != TargetInstrInfo::PHI) + return 0; + + AvailableValsTy AVals; + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + AVals[PredValues[i].first] = PredValues[i].second; + while (I != BB->end() && I->getOpcode() == TargetInstrInfo::PHI) { + bool Same = true; + for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { + unsigned SrcReg = I->getOperand(i).getReg(); + MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB(); + if (AVals[SrcBB] != SrcReg) { + Same = false; + break; + } + } + if (Same) + return I->getOperand(0).getReg(); + ++I; + } + return 0; +} + +/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define +/// a value of the given register class at the start of the specified basic +/// block. It returns the virtual register defined by the instruction. +static +MachineInstr *InsertNewDef(unsigned Opcode, + MachineBasicBlock *BB, MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + MachineRegisterInfo *MRI, const TargetInstrInfo *TII) { + unsigned NewVR = MRI->createVirtualRegister(RC); + return BuildMI(*BB, I, DebugLoc::getUnknownLoc(), TII->get(Opcode), NewVR); +} + +/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that +/// is live in the middle of the specified block. +/// +/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one +/// important case: if there is a definition of the rewritten value after the +/// 'use' in BB. Consider code like this: +/// +/// X1 = ... +/// SomeBB: +/// use(X) +/// X2 = ... +/// br Cond, SomeBB, OutBB +/// +/// In this case, there are two values (X1 and X2) added to the AvailableVals +/// set by the client of the rewriter, and those values are both live out of +/// their respective blocks. However, the use of X happens in the *middle* of +/// a block. Because of this, we need to insert a new PHI node in SomeBB to +/// merge the appropriate values, and this value isn't live out of the block. +/// +unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!getAvailableVals(AV).count(BB)) + return GetValueAtEndOfBlockInternal(BB); + + // If there are no predecessors, just return undef. + if (BB->pred_empty()) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + VRC, MRI, TII); + return NewDef->getOperand(0).getReg(); + } + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues; + unsigned SingularValue = 0; + + bool isFirstPred = true; + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue != 0) + return SingularValue; + + // If an identical PHI is already in BB, just reuse it. + unsigned DupPHI = LookForIdenticalPHI(BB, PredValues); + if (DupPHI) + return DupPHI; + + // Otherwise, we do need a PHI: insert one now. + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineInstr *InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, + Loc, VRC, MRI, TII); + + // Fill in all the predecessors of the PHI. + MachineInstrBuilder MIB(InsertedPHI); + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + InsertedPHI->eraseFromParent(); + return ConstVal; + } + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI->getOperand(0).getReg(); +} + +static +MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, + MachineOperand *U) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + if (&MI->getOperand(i) == U) + return MI->getOperand(i+1).getMBB(); + } + + llvm_unreachable("MachineOperand::getParent() failure?"); + return 0; +} + +/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, +/// which use their value in the corresponding predecessor. +void MachineSSAUpdater::RewriteUse(MachineOperand &U) { + MachineInstr *UseMI = U.getParent(); + unsigned NewVR = 0; + if (UseMI->getOpcode() == TargetInstrInfo::PHI) { + MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U); + NewVR = GetValueAtEndOfBlockInternal(SourceBB); + } else { + NewVR = GetValueInMiddleOfBlock(UseMI->getParent()); + } + + U.setReg(NewVR); +} + +void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) { + MRI->replaceRegWith(OldReg, NewReg); + + AvailableValsTy &AvailableVals = getAvailableVals(AV); + for (DenseMap<MachineBasicBlock*, unsigned>::iterator + I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I) + if (I->second == OldReg) + I->second = NewReg; +} + +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// walking predecessors inserting PHI nodes as needed until we get to a block +/// where the value is available. +/// +unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ + AvailableValsTy &AvailableVals = getAvailableVals(AV); + + // Query AvailableVals by doing an insertion of null. + std::pair<AvailableValsTy::iterator, bool> InsertRes = + AvailableVals.insert(std::make_pair(BB, 0)); + + // Handle the case when the insertion fails because we have already seen BB. + if (!InsertRes.second) { + // If the insertion failed, there are two cases. The first case is that the + // value is already available for the specified block. If we get this, just + // return the value. + if (InsertRes.first->second != 0) + return InsertRes.first->second; + + // Otherwise, if the value we find is null, then this is the value is not + // known but it is being computed elsewhere in our recursion. This means + // that we have a cycle. Handle this by inserting a PHI node and returning + // it. When we get back to the first instance of the recursion we will fill + // in the PHI node. + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + MachineInstr *NewPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc, + VRC, MRI,TII); + unsigned NewVR = NewPHI->getOperand(0).getReg(); + InsertRes.first->second = NewVR; + return NewVR; + } + + // If there are no predecessors, then we must have found an unreachable block + // just return 'undef'. Since there are no predecessors, InsertRes must not + // be invalidated. + if (BB->pred_empty()) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + VRC, MRI, TII); + return InsertRes.first->second = NewDef->getOperand(0).getReg(); + } + + // Okay, the value isn't in the map and we just inserted a null in the entry + // to indicate that we're processing the block. Since we have no idea what + // value is in this block, we have to recurse through our predecessors. + // + // While we're walking our predecessors, we keep track of them in a vector, + // then insert a PHI node in the end if we actually need one. We could use a + // smallvector here, but that would take a lot of stack space for every level + // of the recursion, just use IncomingPredInfo as an explicit stack. + IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); + unsigned FirstPredInfoEntry = IncomingPredInfo.size(); + + // As we're walking the predecessors, keep track of whether they are all + // producing the same value. If so, this value will capture it, if not, it + // will get reset to null. We distinguish the no-predecessor case explicitly + // below. + unsigned SingularValue = 0; + bool isFirstPred = true; + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); + IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + + /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If + /// this block is involved in a loop, a no-entry PHI node will have been + /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted + /// above. + unsigned &InsertedVal = AvailableVals[BB]; + + // If all the predecessor values are the same then we don't need to insert a + // PHI. This is the simple and common case. + if (SingularValue) { + // If a PHI node got inserted, replace it with the singlar value and delete + // it. + if (InsertedVal) { + MachineInstr *OldVal = MRI->getVRegDef(InsertedVal); + // Be careful about dead loops. These RAUW's also update InsertedVal. + assert(InsertedVal != SingularValue && "Dead loop?"); + ReplaceRegWith(InsertedVal, SingularValue); + OldVal->eraseFromParent(); + } + + InsertedVal = SingularValue; + + // Drop the entries we added in IncomingPredInfo to restore the stack. + IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + return InsertedVal; + } + + + // Otherwise, we do need a PHI: insert one now if we don't already have one. + MachineInstr *InsertedPHI; + if (InsertedVal == 0) { + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front(); + InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc, + VRC, MRI, TII); + InsertedVal = InsertedPHI->getOperand(0).getReg(); + } else { + InsertedPHI = MRI->getVRegDef(InsertedVal); + } + + // Fill in all the predecessors of the PHI. + MachineInstrBuilder MIB(InsertedPHI); + for (IncomingPredInfoTy::iterator I = + IncomingPredInfo.begin()+FirstPredInfoEntry, + E = IncomingPredInfo.end(); I != E; ++I) + MIB.addReg(I->second).addMBB(I->first); + + // Drop the entries we added in IncomingPredInfo to restore the stack. + IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + MRI->replaceRegWith(InsertedVal, ConstVal); + InsertedPHI->eraseFromParent(); + InsertedVal = ConstVal; + } else { + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + } + + return InsertedVal; +} diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index d9f4c99..917d053 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -376,15 +376,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB doesn't fall through but is empty!", MBB); } } - if (TII->BlockHasNoFallThrough(*MBB)) { - if (MBB->empty()) { - report("TargetInstrInfo says the block has no fall through, but the " - "block is empty!", MBB); - } else if (!MBB->back().getDesc().isBarrier()) { - report("TargetInstrInfo says the block has no fall through, but the " - "block does not end in a barrier!", MBB); - } - } } else { // Block is last in function. if (MBB->empty()) { diff --git a/lib/CodeGen/MaxStackAlignment.cpp b/lib/CodeGen/MaxStackAlignment.cpp new file mode 100644 index 0000000..d327cfa --- /dev/null +++ b/lib/CodeGen/MaxStackAlignment.cpp @@ -0,0 +1,70 @@ +//===-- MaxStackAlignment.cpp - Compute the required stack alignment -- ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for vector register usage and aligned local objects to +// calculate the maximum required alignment for a function. This is used by +// targets which support it to determine if dynamic stack realignment is +// necessary. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" + +using namespace llvm; + +namespace { + struct MaximalStackAlignmentCalculator : public MachineFunctionPass { + static char ID; + MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + + // Calculate max stack alignment of all already allocated stack objects. + FFI->calculateMaxStackAlignment(); + unsigned MaxAlign = FFI->getMaxAlignment(); + + // Be over-conservative: scan over all vreg defs and find whether vector + // registers are used. If yes, there is probability that vector registers + // will be spilled and thus the stack needs to be aligned properly. + // FIXME: It would be better to only do this if a spill actually + // happens rather than conseratively aligning the stack regardless. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); + + if (FFI->getMaxAlignment() == MaxAlign) + return false; + + FFI->setMaxAlignment(MaxAlign); + return true; + } + + virtual const char *getPassName() const { + return "Stack Alignment Requirements Auto-Detector"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MaximalStackAlignmentCalculator::ID = 0; +} + +FunctionPass* +llvm::createMaxStackAlignmentCalculatorPass() { + return new MaximalStackAlignmentCalculator(); +} + diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 2e30cc6..c62d179 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -287,7 +287,7 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Okay, if we now know that the value is not live out of the block, we can // add a kill marker in this block saying that it kills the incoming value! - if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) { + if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, the first // terminator instruction at the end of the block may also use the value. @@ -301,8 +301,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Check that no other terminators use values. #ifndef NDEBUG - for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end(); - ++TI) { + for (MachineBasicBlock::iterator TI = llvm::next(Term); + TI != opBlock.end(); ++TI) { assert(!TI->readsRegister(SrcReg) && "Terminator instructions cannot use virtual registers unless" "they are the first terminator in a block!"); @@ -353,59 +353,13 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, // We break edges when registers are live out from the predecessor block // (not considering PHI nodes). If the register is live in to this block // anyway, we would gain nothing from splitting. - if (!LV.isLiveIn(Reg, MBB) && isLiveOut(Reg, *PreMBB, LV)) + if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) SplitCriticalEdge(PreMBB, &MBB); } } return true; } -bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, - LiveVariables &LV) { - LiveVariables::VarInfo &VI = LV.getVarInfo(Reg); - - // Loop over all of the successors of the basic block, checking to see if - // the value is either live in the block, or if it is killed in the block. - std::vector<MachineBasicBlock*> OpSuccBlocks; - for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), - E = MBB.succ_end(); SI != E; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - - // Is it alive in this successor? - unsigned SuccIdx = SuccMBB->getNumber(); - if (VI.AliveBlocks.test(SuccIdx)) - return true; - OpSuccBlocks.push_back(SuccMBB); - } - - // Check to see if this value is live because there is a use in a successor - // that kills it. - switch (OpSuccBlocks.size()) { - case 1: { - MachineBasicBlock *SuccMBB = OpSuccBlocks[0]; - for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) - if (VI.Kills[i]->getParent() == SuccMBB) - return true; - break; - } - case 2: { - MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1]; - for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) - if (VI.Kills[i]->getParent() == SuccMBB1 || - VI.Kills[i]->getParent() == SuccMBB2) - return true; - break; - } - default: - std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end()); - for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) - if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(), - VI.Kills[i]->getParent())) - return true; - } - return false; -} - MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, MachineBasicBlock *B) { assert(A && B && "Missing MBB end point"); @@ -423,7 +377,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, ++NumSplits; MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->insert(next(MachineFunction::iterator(A)), NMBB); + MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB); DEBUG(errs() << "PHIElimination splitting critical edge:" " BB#" << A->getNumber() << " -- BB#" << NMBB->getNumber() diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index f5872cb..b0b71ce 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -93,12 +93,6 @@ namespace llvm { bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, LiveVariables &LV); - /// isLiveOut - Determine if Reg is live out from MBB, when not - /// considering PHI nodes. This means that Reg is either killed by - /// a successor block or passed through one. - bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, - LiveVariables &LV); - /// SplitCriticalEdge - Split a critical edge from A to B by /// inserting a new MBB. Update branches in A and PHI instructions /// in B. Return the new block. diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 9101fce2..79be295 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -373,7 +373,8 @@ void SchedulePostRATDList::FinishBlock() { /// void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { // Initialize the indices to indicate that no registers are live. - std::fill(KillIndices, array_endof(KillIndices), ~0u); + for (unsigned i = 0; i < TRI->getNumRegs(); ++i) + KillIndices[i] = ~0u; // Determine the live-out physregs for this block. if (!BB->empty() && BB->back().getDesc().isReturn()) { @@ -510,12 +511,9 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { } if (MO.isKill() != kill) { - bool removed = ToggleKillFlag(MI, MO); - if (removed) { - DEBUG(errs() << "Fixed <removed> in "); - } else { - DEBUG(errs() << "Fixed " << MO << " in "); - } + DEBUG(errs() << "Fixing " << MO << " in "); + // Warning: ToggleKillFlag may invalidate MO. + ToggleKillFlag(MI, MO); DEBUG(MI->dump()); } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 8f62345..b0d7a47 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "pre-alloc-split" #include "VirtRegMap.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" @@ -104,6 +105,7 @@ namespace { AU.addRequired<LiveStacks>(); AU.addPreserved<LiveStacks>(); AU.addPreserved<RegisterCoalescer>(); + AU.addPreserved<CalculateSpillWeights>(); if (StrongPHIElim) AU.addPreservedID(StrongPHIEliminationID); else @@ -876,7 +878,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB); else - KillPt = next(MachineBasicBlock::iterator(DefMI)); + KillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); if (KillPt == DefMI->getParent()->end()) return false; @@ -1118,7 +1120,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { return false; // No gap to insert spill. } } else { - SpillPt = next(MachineBasicBlock::iterator(DefMI)); + SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI)); if (SpillPt == DefMBB->end()) { DEBUG(errs() << "FAILED (could not find a suitable spill point).\n"); return false; // No gap to insert spill. diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 8905f75..e94247f 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -136,9 +136,10 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + MachineFrameInfo *FFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool HasCalls = false; + bool HasCalls = FFI->hasCalls(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); @@ -166,7 +167,6 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { HasCalls = true; } - MachineFrameInfo *FFI = Fn.getFrameInfo(); FFI->setHasCalls(HasCalls); FFI->setMaxCallFrameSize(MaxCallFrameSize); @@ -674,7 +674,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (PrevI == BB->end()) I = BB->begin(); // The replaced instr was the first in the block. else - I = next(PrevI); + I = llvm::next(PrevI); continue; } diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 4ff5129..c02d47b 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -16,6 +16,7 @@ #include "VirtRegRewriter.h" #include "Spiller.h" #include "llvm/Function.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -59,6 +60,11 @@ PreSplitIntervals("pre-alloc-split", cl::desc("Pre-register allocation live interval splitting"), cl::init(false), cl::Hidden); +static cl::opt<bool> +TrivCoalesceEnds("trivial-coalesce-ends", + cl::desc("Attempt trivial coalescing of interval ends"), + cl::init(false), cl::Hidden); + static RegisterRegAlloc linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); @@ -182,6 +188,7 @@ namespace { // Make sure PassManager knows which analyses to make available // to coalescing and which analyses coalescing invalidates. AU.addRequiredTransitive<RegisterCoalescer>(); + AU.addRequired<CalculateSpillWeights>(); if (PreSplitIntervals) AU.addRequiredID(PreAllocSplittingID); AU.addRequired<LiveStacks>(); @@ -390,66 +397,71 @@ void RALinScan::ComputeRelatedRegClasses() { RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); } -/// attemptTrivialCoalescing - If a simple interval is defined by a copy, -/// try allocate the definition the same register as the source register -/// if the register is not defined during live time of the interval. This -/// eliminate a copy. This is used to coalesce copies which were not -/// coalesced away before allocation either due to dest and src being in -/// different register classes or because the coalescer was overly -/// conservative. +/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try +/// allocate the definition the same register as the source register if the +/// register is not defined during live time of the interval. If the interval is +/// killed by a copy, try to use the destination register. This eliminates a +/// copy. This is used to coalesce copies which were not coalesced away before +/// allocation either due to dest and src being in different register classes or +/// because the coalescer was overly conservative. unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { unsigned Preference = vrm_->getRegAllocPref(cur.reg); if ((Preference && Preference == Reg) || !cur.containsOneValue()) return Reg; - VNInfo *vni = cur.begin()->valno; - if ((vni->def == SlotIndex()) || - vni->isUnused() || !vni->isDefAccurate()) + // We cannot handle complicated live ranges. Simple linear stuff only. + if (cur.ranges.size() != 1) return Reg; - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg; - if (!CopyMI || - !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + + const LiveRange &range = cur.ranges.front(); + + VNInfo *vni = range.valno; + if (vni->isUnused()) return Reg; - PhysReg = SrcReg; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { - if (!vrm_->isAssignedReg(SrcReg)) + + unsigned CandReg; + { + MachineInstr *CopyMI; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (vni->def != SlotIndex() && vni->isDefAccurate() && + (CopyMI = li_->getInstructionFromIndex(vni->def)) && + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + // Defined by a copy, try to extend SrcReg forward + CandReg = SrcReg; + else if (TrivCoalesceEnds && + (CopyMI = + li_->getInstructionFromIndex(range.end.getBaseIndex())) && + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + cur.reg == SrcReg) + // Only used by a copy, try to extend DstReg backwards + CandReg = DstReg; + else + return Reg; + } + + if (TargetRegisterInfo::isVirtualRegister(CandReg)) { + if (!vrm_->isAssignedReg(CandReg)) return Reg; - PhysReg = vrm_->getPhys(SrcReg); + CandReg = vrm_->getPhys(CandReg); } - if (Reg == PhysReg) + if (Reg == CandReg) return Reg; const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); - if (!RC->contains(PhysReg)) + if (!RC->contains(CandReg)) return Reg; - // Try to coalesce. - if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) { - DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg) - << '\n'); - vrm_->clearVirt(cur.reg); - vrm_->assignVirt2Phys(cur.reg, PhysReg); - - // Remove unnecessary kills since a copy does not clobber the register. - if (li_->hasInterval(SrcReg)) { - LiveInterval &SrcLI = li_->getInterval(SrcReg); - for (MachineRegisterInfo::use_iterator I = mri_->use_begin(cur.reg), - E = mri_->use_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - if (!O.isKill()) - continue; - MachineInstr *MI = &*I; - if (SrcLI.liveAt(li_->getInstructionIndex(MI).getDefIndex())) - O.setIsKill(false); - } - } + if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg)) + return Reg; - ++NumCoalesce; - return PhysReg; - } + // Try to coalesce. + DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) + << '\n'); + vrm_->clearVirt(cur.reg); + vrm_->assignVirt2Phys(cur.reg, CandReg); - return Reg; + ++NumCoalesce; + return CandReg; } bool RALinScan::runOnMachineFunction(MachineFunction &fn) { @@ -1261,9 +1273,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // The earliest start of a Spilled interval indicates up to where // in handled we need to roll back + assert(!spillIs.empty() && "No spill intervals?"); + SlotIndex earliestStart = spillIs[0]->beginIndex(); - LiveInterval *earliestStartInterval = cur; - // Spill live intervals of virtual regs mapped to the physical register we // want to clear (and its aliases). We only spill those that overlap with the // current interval as the rest do not affect its allocation. we also keep @@ -1274,19 +1286,16 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { LiveInterval *sli = spillIs.back(); spillIs.pop_back(); DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n'); - earliestStartInterval = - (earliestStartInterval->beginIndex() < sli->beginIndex()) ? - earliestStartInterval : sli; + if (sli->beginIndex() < earliestStart) + earliestStart = sli->beginIndex(); std::vector<LiveInterval*> newIs; - newIs = spiller_->spill(sli, spillIs); + newIs = spiller_->spill(sli, spillIs, &earliestStart); addStackInterval(sli, ls_, li_, mri_, *vrm_); std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); } - SlotIndex earliestStart = earliestStartInterval->beginIndex(); - DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a @@ -1295,7 +1304,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { while (!handled_.empty()) { LiveInterval* i = handled_.back(); // If this interval starts before t we are done. - if (i->beginIndex() < earliestStart) + if (!i->empty() && i->beginIndex() < earliestStart) break; DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n'); handled_.pop_back(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index c677d34..c2014a7 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -36,6 +36,7 @@ #include "PBQP/Heuristics/Briggs.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -90,6 +91,7 @@ namespace { au.addRequired<LiveIntervals>(); //au.addRequiredID(SplitCriticalEdgesID); au.addRequired<RegisterCoalescer>(); + au.addRequired<CalculateSpillWeights>(); au.addRequired<LiveStacks>(); au.addPreserved<LiveStacks>(); au.addRequired<MachineLoopInfo>(); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 94680ed..67bf209 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -125,7 +125,7 @@ void RegScavenger::forward() { Tracking = true; } else { assert(MBBI != MBB->end() && "Already at the end of the basic block!"); - MBBI = next(MBBI); + MBBI = llvm::next(MBBI); } MachineInstr *MI = MBBI; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 06ffdd6..2b52187 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -119,7 +119,8 @@ namespace { /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. bool SimplifyDemandedBits(SDValue Op) { - APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits()); + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); return SimplifyDemandedBits(Op, Demanded); } @@ -546,7 +547,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, To[0].getNode()->dump(&DAG); errs() << " and " << NumTo-1 << " other values\n"; for (unsigned i = 0, e = NumTo; i != e; ++i) - assert(N->getValueType(i) == To[i].getValueType() && + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && "Cannot combine value to value of different type!")); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To, &DeadNodes); @@ -1687,10 +1689,14 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // + // do not sink logical op inside of a vector extend, since it may combine + // into a vsetcc. if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| N0.getOpcode() == ISD::SIGN_EXTEND || (N0.getOpcode() == ISD::TRUNCATE && !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + !VT.isVector() && N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { @@ -1943,8 +1949,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); + } // fold (or c1, c2) -> c1|c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); @@ -2434,7 +2442,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (shl c1, c2) -> c1<<c2 if (N0C && N1C) @@ -2450,7 +2458,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return N0; // if (shl x, c) is known to be zero, return 0 if (DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(VT.getSizeInBits()))) + APInt::getAllOnesValue(OpSizeInBits))) return DAG.getConstant(0, VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && @@ -2526,6 +2534,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) @@ -2537,7 +2546,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N0C && N0C->isAllOnesValue()) return N0; // fold (sra x, (setge c, size(x))) -> undef - if (N1C && N1C->getZExtValue() >= VT.getSizeInBits()) + if (N1C && N1C->getZExtValue() >= OpSizeInBits) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -2545,7 +2554,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { - unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, @@ -2556,7 +2565,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SRA) { if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); - if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1; + if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getConstant(Sum, N1C->getValueType(0))); } @@ -2572,9 +2581,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); if (N01C && N1C) { // Determine what the truncate's result bitsize and type would be. - unsigned VTValSize = VT.getSizeInBits(); EVT TruncVT = - EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue()); + EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -2607,7 +2615,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); - TruncC.trunc(TruncVT.getSizeInBits()); + TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, @@ -2636,7 +2644,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); EVT VT = N0.getValueType(); - unsigned OpSizeInBits = VT.getSizeInBits(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) @@ -3029,7 +3037,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { else if (Op.getValueType().bitsGT(VT)) Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, - DAG.getValueType(N0.getValueType())); + DAG.getValueType(N0.getValueType().getScalarType())); } } @@ -3170,7 +3178,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } else if (Op.getValueType().bitsGT(VT)) { Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); } - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType()); + return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + N0.getValueType().getScalarType()); } // Fold (zext (and (trunc x), cst)) -> (and x, cst), @@ -3193,6 +3202,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { X, DAG.getConstant(Mask, VT)); } + // Fold (zext (and x, cst)) -> (and (zext x), cst) + if (N0.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant && + N0.getOperand(0).getOpcode() != ISD::TRUNCATE && + N0.getOperand(0).hasOneUse()) { + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask.zext(VT.getSizeInBits()); + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + N0.getOperand(0)), + DAG.getConstant(Mask, VT)); + } + // fold (zext (load x)) -> (zext (truncate (zextload x))) if (ISD::isNON_EXTLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || @@ -3269,6 +3291,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SCC.getNode()) return SCC; } + // (zext (shl (zext x), cst)) -> (shl (zext x), cst) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SHL) { + // If the original shl may be shifting out bits, do not perform this + // transformation. + unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - + N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); + if (ShAmt > KnownZeroBits) + return SDValue(); + } + DebugLoc dl = N->getDebugLoc(); + return DAG.getNode(N0.getOpcode(), dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1))); + } + return SDValue(); } @@ -3529,7 +3571,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT EVT = cast<VTSDNode>(N1)->getVT(); - unsigned VTBits = VT.getSizeInBits(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); // fold (sext_in_reg c1) -> c1 @@ -3537,7 +3579,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1) + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -3552,7 +3594,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // if x is small enough. if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getValueType().getSizeInBits() < EVTBits) + if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits) return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); } @@ -3576,11 +3618,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) - if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) { + if (ShAmt->getZExtValue()+EVTBits <= VTBits) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); - if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1)); } @@ -3681,7 +3723,6 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); EVT LD1VT = LD1->getValueType(0); - const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && @@ -3689,7 +3730,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { // If one is volatile it might be ok, but play conservative and bail out. !LD1->isVolatile() && !LD2->isVolatile() && - TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { + DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); @@ -4804,49 +4845,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// InferAlignment - If we can infer some alignment information from this -/// pointer, return it. -static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) { - // If this is a direct reference to a stack slot, use information about the - // stack slot's alignment. - int FrameIdx = 1 << 31; - int64_t FrameOffset = 0; - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { - FrameIdx = FI->getIndex(); - } else if (Ptr.getOpcode() == ISD::ADD && - isa<ConstantSDNode>(Ptr.getOperand(1)) && - isa<FrameIndexSDNode>(Ptr.getOperand(0))) { - FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); - FrameOffset = Ptr.getConstantOperandVal(1); - } - - if (FrameIdx != (1 << 31)) { - // FIXME: Handle FI+CST. - const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo(); - if (MFI.isFixedObjectIndex(FrameIdx)) { - int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; - - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = MinAlign(ObjectOffset, StackAlign); - - // Finally, the frame object itself may have a known alignment. Factor - // the alignment + offset into a new alignment. For example, if we know - // the FI is 8 byte aligned, but the pointer is 4 off, we really have a - // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte - // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. - unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), - FrameOffset); - return std::max(Align, FIInfoAlign); - } - } - - return 0; -} - SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); @@ -4854,7 +4852,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { - if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), @@ -5079,7 +5077,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { - if (unsigned Align = InferAlignment(Ptr, DAG)) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(), @@ -5231,7 +5229,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, APInt::getLowBitsSet( - Value.getValueSizeInBits(), + Value.getValueType().getScalarType().getSizeInBits(), ST->getMemoryVT().getSizeInBits()))) return SDValue(N, 0); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 5eb9ca1..4ead9c9 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -532,7 +532,15 @@ bool FastISel::SelectBitCast(User *I) { bool FastISel::SelectInstruction(Instruction *I) { - return SelectOperator(I, I->getOpcode()); + // First, try doing target-independent selection. + if (SelectOperator(I, I->getOpcode())) + return true; + + // Next, try calling the target to attempt to handle the instruction. + if (TargetSelectInstruction(I)) + return true; + + return false; } /// FastEmitBranch - Emit an unconditional branch to the given block, @@ -541,7 +549,7 @@ FastISel::SelectInstruction(Instruction *I) { void FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { MachineFunction::iterator NextMBB = - next(MachineFunction::iterator(MBB)); + llvm::next(MachineFunction::iterator(MBB)); if (MBB->isLayoutSuccessor(MSucc)) { // The unconditional fall-through case, which needs no instructions. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 273dbf0..f9c05d0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -232,7 +232,7 @@ void SelectionDAGLegalize::LegalizeDAG() { // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != next(E); ++I) + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) LegalizeOp(SDValue(I, 0)); // Finally, it's possible the root changed. Get the new root. @@ -2294,9 +2294,15 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // NOTE: we could fall back on load/store here too for targets without // SAR. However, it is doubtful that any exist. EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); - unsigned BitsDiff = Node->getValueType(0).getSizeInBits() - + EVT VT = Node->getValueType(0); + EVT ShiftAmountTy = TLI.getShiftAmountTy(); + if (VT.isVector()) { + ShiftAmountTy = VT; + VT = VT.getVectorElementType(); + } + unsigned BitsDiff = VT.getSizeInBits() - ExtraVT.getSizeInBits(); - SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy()); + SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy); Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), Node->getOperand(0), ShiftCst); Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); @@ -3059,8 +3065,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, // SelectionDAG::Legalize - This is the entry point for the file. // -void SelectionDAG::Legalize(bool TypesNeedLegalizing, - CodeGenOpt::Level OptLevel) { +void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) { /// run - This is the main entry point to this class. /// SelectionDAGLegalize(*this, OptLevel).LegalizeDAG(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8ac8063..2f4457e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1167,55 +1167,62 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { GetExpandedInteger(N->getOperand(0), InL, InH); SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); - SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); - SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), - Amt, NVBitsNode, ISD::SETULT); + SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); + SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); - SDValue Lo1, Hi1, Lo2, Hi2; + SDValue LoS, HiS, LoL, HiL; switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); case ISD::SHL: - // ShAmt < NVTBits - Lo1 = DAG.getConstant(0, NVT); // Low part is zero. - Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. - - // ShAmt >= NVTBits - Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); - Hi2 = DAG.getNode(ISD::OR, dl, NVT, + // Short: ShAmt < NVTBits + LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + HiS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), - DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2)); + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); + + // Long: ShAmt >= NVTBits + LoL = DAG.getConstant(0, NVT); // Lo part is zero. + HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; case ISD::SRL: - // ShAmt < NVTBits - Hi1 = DAG.getConstant(0, NVT); // Hi part is zero. - Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. - - // ShAmt >= NVTBits - Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); - Lo2 = DAG.getNode(ISD::OR, dl, NVT, - DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), - DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); - - Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getConstant(0, NVT); // Hi part is zero. + LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: - // ShAmt < NVTBits - Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. - DAG.getConstant(NVTBits-1, ShTy)); - Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. - - // ShAmt >= NVTBits - Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); - Lo2 = DAG.getNode(ISD::OR, dl, NVT, + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), - DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2)); + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. + DAG.getConstant(NVTBits-1, ShTy)); + LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2); + Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); + Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); return true; } @@ -1989,7 +1996,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SRA: case ISD::SRL: case ISD::ROTL: - case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + case ISD::RETURNADDR: + case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; } // If the result is null, the sub-method took care of registering results etc. @@ -2173,6 +2182,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); } +SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { + // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This + // surely makes pretty nice problems on 8/16 bit targets. Just truncate this + // constant to valid type. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(0), Lo, Hi); + return DAG.UpdateNodeOperands(SDValue(N, 0), Lo); +} + SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT DstVT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2ee9f8a..c35f7ad 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -362,6 +362,7 @@ private: SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ExpandIntOp_TRUNCATE(SDNode *N); SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_RETURNADDR(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, DebugLoc dl); @@ -516,6 +517,7 @@ private: SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); SDValue ScalarizeVecRes_SETCC(SDNode *N); @@ -559,6 +561,7 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, @@ -601,6 +604,7 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 785c2ad..2625245 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -79,7 +79,7 @@ bool VectorLegalizer::Run() { // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != next(E); ++I) + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) LegalizeOp(SDValue(I, 0)); // Finally, it's possible the root changed. Get the new root. @@ -179,6 +179,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); break; case ISD::SINT_TO_FP: diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 023324b..cf67ab9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -54,6 +54,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_SIGN_EXTEND_INREG(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -195,6 +196,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT, + LHS, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), @@ -401,6 +409,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; @@ -700,6 +709,18 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, Hi = DAG.getUNDEF(HiVT); } +void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + DebugLoc dl = N->getDebugLoc(); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + N->getOperand(1)); +} + void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); @@ -1141,6 +1162,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); break; case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; @@ -1691,6 +1713,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { WidenVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + WidenVT, WidenLHS, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index d53de34..b2ee8bb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -20,10 +20,16 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtarget.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +cl::opt<bool> +DisableInstScheduling("disable-inst-scheduling", + cl::init(false), + cl::desc("Disable instruction scheduling")); + ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf) { } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c38c79b..da55e6b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" @@ -47,6 +48,8 @@ #include <cmath> using namespace llvm; +extern cl::opt<bool> DisableInstScheduling; + /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { @@ -551,6 +554,9 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes, } DeallocateNode(N); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } } @@ -576,6 +582,9 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { N->DropOperands(); DeallocateNode(N); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } void SelectionDAG::DeallocateNode(SDNode *N) { @@ -587,6 +596,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) { N->NodeType = ISD::DELETED_NODE; NodeAllocator.Deallocate(AllNodes.remove(N)); + + // Remove the ordering of this node. + if (Ordering) Ordering->remove(N); } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -690,7 +702,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } /// FindModifiedNodeSlot - Find a slot for the specified node if its operands @@ -707,7 +721,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } @@ -724,7 +740,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, FoldingSetNodeID ID; AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); AddNodeIDCustom(ID, N); - return CSEMap.FindNodeOrInsertPos(ID, InsertPos); + SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (Ordering) Ordering->remove(Node); + return Node; } /// VerifyNode - Sanity check the given node. Aborts if it is invalid. @@ -777,8 +795,13 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli) : TLI(tli), FLI(fli), DW(0), EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(), - getVTList(MVT::Other)), Root(getEntryNode()) { + getVTList(MVT::Other)), + Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); + if (DisableInstScheduling) { + Ordering = new NodeOrdering(); + Ordering->add(&EntryNode); + } } void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, @@ -791,6 +814,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, SelectionDAG::~SelectionDAG() { allnodes_clear(); + delete Ordering; } void SelectionDAG::allnodes_clear() { @@ -816,6 +840,10 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); + if (DisableInstScheduling) { + Ordering = new NodeOrdering(); + Ordering->add(&EntryNode); + } } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -831,8 +859,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { } SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { + assert(!VT.isVector() && + "getZeroExtendInReg should use the vector element type instead of " + "the vector type!"); if (Op.getValueType() == VT) return Op; - APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(), + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Imm = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); return getNode(ISD::AND, DL, Op.getValueType(), Op, getConstant(Imm, Op.getValueType())); @@ -872,14 +904,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { ID.AddPointer(&Val); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { + if (Ordering) Ordering->add(N); if (!VT.isVector()) return SDValue(N, 0); + } if (!N) { N = NodeAllocator.Allocate<ConstantSDNode>(); new (N) ConstantSDNode(isT, &Val, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -916,14 +951,17 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ ID.AddPointer(&V); void *IP = 0; SDNode *N = NULL; - if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) + if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) { + if (Ordering) Ordering->add(N); if (!VT.isVector()) return SDValue(N, 0); + } if (!N) { N = NodeAllocator.Allocate<ConstantFPSDNode>(); new (N) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } SDValue Result(N, 0); @@ -978,12 +1016,15 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, ID.AddInteger(Offset); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>(); new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -993,12 +1034,15 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddInteger(FI); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>(); new (N) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1012,12 +1056,15 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>(); new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1037,12 +1084,15 @@ SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT, ID.AddPointer(C); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1063,12 +1113,15 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, C->AddSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1077,12 +1130,15 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); ID.AddPointer(MBB); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>(); new (N) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1098,6 +1154,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { N = NodeAllocator.Allocate<VTSDNode>(); new (N) VTSDNode(VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1107,6 +1164,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); new (N) ExternalSymbolSDNode(false, Sym, 0, VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1119,6 +1177,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1131,6 +1190,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { new (N) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); + if (Ordering) Ordering->add(N); } return SDValue(CondCodeNodes[Cond], 0); } @@ -1223,8 +1283,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, ID.AddInteger(MaskVec[i]); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when @@ -1236,6 +1298,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1253,12 +1316,15 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>(); new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1267,12 +1333,15 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); ID.AddInteger(RegNo); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<RegisterSDNode>(); new (N) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1284,12 +1353,15 @@ SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl, AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1); ID.AddInteger(LabelID); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<LabelSDNode>(); new (N) LabelSDNode(Opcode, dl, Root, LabelID); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1303,12 +1375,15 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT, ID.AddPointer(BA); ID.AddInteger(TargetFlags); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>(); new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1321,13 +1396,16 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { ID.AddPointer(V); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>(); new (N) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -1480,7 +1558,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { if (Op.getValueType().isVector()) return false; - unsigned BitWidth = Op.getValueSizeInBits(); + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); } @@ -1503,7 +1581,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { unsigned BitWidth = Mask.getBitWidth(); - assert(BitWidth == Op.getValueType().getSizeInBits() && + assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() && "Mask size mismatches value type size!"); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -1760,7 +1838,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; APInt InMask = Mask; InMask.trunc(InBits); @@ -1774,7 +1852,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; APInt InMask = Mask; @@ -1815,7 +1893,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InMask = Mask; InMask.trunc(InBits); KnownZero.trunc(InBits); @@ -1827,7 +1905,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getSizeInBits(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InMask = Mask; InMask.zext(InBits); KnownZero.zext(InBits); @@ -1960,7 +2038,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); - unsigned VTBits = VT.getSizeInBits(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -1987,7 +2065,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } case ISD::SIGN_EXTEND: - Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits(); + Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: @@ -2238,13 +2316,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<SDNode>(); new (N) SDNode(Opcode, DL, getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2349,6 +2430,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isFloatingPoint() && Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); if (Operand.getValueType() == VT) return Operand; // noop conversion. + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (Operand.getOpcode() == ISD::UNDEF) return getUNDEF(VT); break; @@ -2356,8 +2441,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid sext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid sext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); break; @@ -2365,8 +2454,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid zext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid zext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); @@ -2375,8 +2468,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ANY_EXTEND!"); if (Operand.getValueType() == VT) return Operand; // noop extension - assert(Operand.getValueType().bitsLT(VT) - && "Invalid anyext node, dst < src!"); + assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) && + "Invalid anyext node, dst < src!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); @@ -2385,14 +2482,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid TRUNCATE!"); if (Operand.getValueType() == VT) return Operand; // noop truncate - assert(Operand.getValueType().bitsGT(VT) - && "Invalid truncate node, src < dst!"); + assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) && + "Invalid truncate node, src < dst!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); if (OpOpcode == ISD::TRUNCATE) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. - if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT)) + if (Operand.getNode()->getOperand(0).getValueType().getScalarType() + .bitsLT(VT.getScalarType())) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); @@ -2447,8 +2549,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDValue Ops[1] = { Operand }; AddNodeIDNode(ID, Opcode, VTs, Ops, 1); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate<UnarySDNode>(); new (N) UnarySDNode(Opcode, DL, VTs, Operand); CSEMap.InsertNode(N, IP); @@ -2458,6 +2562,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2623,6 +2728,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); + assert(!EVT.isVector() && + "AssertSExt/AssertZExt type should be the vector element type " + "rather than the vector type!"); assert(EVT.bitsLE(VT) && "Not extending!"); if (VT == EVT) return N1; // noop assertion. break; @@ -2632,12 +2740,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); - assert(EVT.bitsLE(VT) && "Not extending!"); + assert(!EVT.isVector() && + "SIGN_EXTEND_INREG type should be the vector element type rather " + "than the vector type!"); + assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending if (N1C) { APInt Val = N1C->getAPIntValue(); - unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits(); + unsigned FromBits = EVT.getSizeInBits(); Val <<= Val.getBitWidth()-FromBits; Val = Val.ashr(Val.getBitWidth()-FromBits); return getConstant(Val, VT); @@ -2859,8 +2970,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate<BinarySDNode>(); new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); CSEMap.InsertNode(N, IP); @@ -2870,6 +2983,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -2936,8 +3050,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate<TernarySDNode>(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); @@ -2945,7 +3061,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, N = NodeAllocator.Allocate<TernarySDNode>(); new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); } + AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -3541,12 +3659,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3604,12 +3724,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3682,6 +3804,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } @@ -3693,6 +3816,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3732,16 +3856,15 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, assert(VT == MemVT && "Non-extending load from different memory type!"); } else { // Extending load. - if (VT.isVector()) - assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() && - "Invalid vector extload!"); - else - assert(MemVT.bitsLT(VT) && - "Should only be an extending load, not truncating!"); - assert((ExtType == ISD::EXTLOAD || VT.isInteger()) && - "Cannot sign/zero extend a FP/Vector load!"); + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); assert(VT.isInteger() == MemVT.isInteger() && "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == MemVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); } bool Indexed = AM != ISD::UNINDEXED; @@ -3758,12 +3881,14 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate<LoadSDNode>(); new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3834,12 +3959,14 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3874,10 +4001,15 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, if (VT == SVT) return getStore(Chain, dl, Val, Ptr, MMO); - assert(VT.bitsGT(SVT) && "Not a truncation?"); + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); - + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == SVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -3889,12 +4021,14 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); + if (Ordering) Ordering->add(E); return SDValue(E, 0); } SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3911,14 +4045,17 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); new (N) StoreSDNode(Ops, dl, VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); return SDValue(N, 0); } @@ -3984,8 +4121,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } N = NodeAllocator.Allocate<SDNode>(); new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); @@ -3996,6 +4135,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4051,8 +4191,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return SDValue(E, 0); + } if (NumOps == 1) { N = NodeAllocator.Allocate<UnarySDNode>(); new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); @@ -4083,6 +4225,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, } } AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4166,7 +4309,7 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { I->VTs[2] == VT3 && I->VTs[3] == VT4) return *I; - EVT *Array = Allocator.Allocate<EVT>(3); + EVT *Array = Allocator.Allocate<EVT>(4); Array[0] = VT1; Array[1] = VT2; Array[2] = VT3; @@ -4545,8 +4688,10 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); - if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(ON); return ON; + } } if (!RemoveNodeFromCSEMaps(N)) @@ -4610,6 +4755,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (IP) CSEMap.InsertNode(N, IP); // Memoize the new node. + if (Ordering) Ordering->add(N); return N; } @@ -4748,8 +4894,10 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, FoldingSetNodeID ID; AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return cast<MachineSDNode>(E); + } } // Allocate a new MachineSDNode. @@ -4771,6 +4919,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, CSEMap.InsertNode(N, IP); AllNodes.push_back(N); + if (Ordering) Ordering->add(N); #ifndef NDEBUG VerifyNode(N); #endif @@ -4807,8 +4956,10 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + if (Ordering) Ordering->add(E); return E; + } } return NULL; } @@ -5867,6 +6018,99 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { &Scalars[0], Scalars.size()); } + +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load +/// is loading from. +bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, + unsigned Bytes, int Dist) const { + if (LD->getChain() != Base->getChain()) + return false; + EVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LD->getOperand(1); + SDValue BaseLoc = Base->getOperand(1); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo(); + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); + if (V && (V->getSExtValue() == Dist*Bytes)) + return true; + } + + GlobalValue *GV1 = NULL; + GlobalValue *GV2 = NULL; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + + +/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if +/// it cannot be inferred. +unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { + // If this is a GlobalAddress + cst, return the alignment. + GlobalValue *GV; + int64_t GVOffset = 0; + if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) + return MinAlign(GV->getAlignment(), GVOffset); + + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = 1 << 31; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (Ptr.getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Ptr.getOperand(1)) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != (1 << 31)) { + // FIXME: Handle FI+CST. + const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + if (MFI.isFixedObjectIndex(FrameIdx)) { + int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; + + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment(); + unsigned Align = MinAlign(ObjectOffset, StackAlign); + + // Finally, the frame object itself may have a known alignment. Factor + // the alignment + offset into a new alignment. For example, if we know + // the FI is 8 byte aligned, but the pointer is 4 off, we really have a + // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte + // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. + return std::max(Align, FIInfoAlign); + } + return FIInfoAlign; + } + + return 0; +} + void SelectionDAG::dump() const { errs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; @@ -5882,6 +6126,9 @@ void SelectionDAG::dump() const { errs() << "\n\n"; } +void SelectionDAG::NodeOrdering::dump() const { +} + void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { print_types(OS, G); print_details(OS, G); @@ -6022,4 +6269,3 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return false; return true; } - diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 57d8903..7568384 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -583,6 +583,9 @@ void SelectionDAGBuilder::visit(Instruction &I) { } void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { + // Tell the DAG that we're processing a new instruction. + DAG.NewInst(); + // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { @@ -2108,7 +2111,7 @@ void SelectionDAGBuilder::visitSelect(User &I) { for (unsigned i = 0; i != NumValues; ++i) Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(), - TrueVal.getValueType(), Cond, + TrueVal.getNode()->getValueType(i), Cond, SDValue(TrueVal.getNode(), TrueVal.getResNo() + i), SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c39437f..a640c7d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -60,8 +60,6 @@ using namespace llvm; static cl::opt<bool> -DisableLegalizeTypes("disable-legalize-types", cl::Hidden); -static cl::opt<bool> EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, cl::desc("Enable verbose messages in the \"fast\" " "instruction selector")); @@ -362,6 +360,39 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } +/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is +/// attached with this instruction. +static void SetDebugLoc(unsigned MDDbgKind, + MetadataContext &TheMetadata, + Instruction *I, + SelectionDAGBuilder *SDB, + FastISel *FastIS, + MachineFunction *MF) { + if (!isa<DbgInfoIntrinsic>(I)) + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); + + SDB->setCurDebugLoc(Loc); + + if (FastIS) + FastIS->setCurDebugLoc(Loc); + + // If the function doesn't have a default debug location yet, set + // it. This is kind of a hack. + if (MF->getDefaultDebugLoc().isUnknown()) + MF->setDefaultDebugLoc(Loc); + } +} + +/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown. +static void ResetDebugLoc(SelectionDAGBuilder *SDB, + FastISel *FastIS) { + SDB->setCurDebugLoc(DebugLoc::getUnknownLoc()); + if (FastIS) + FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc()); +} + void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, BasicBlock::iterator Begin, BasicBlock::iterator End, @@ -373,20 +404,16 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { - if (MDDbgKind) { - // Update DebugLoc if debug information is attached with this - // instruction. - if (!isa<DbgInfoIntrinsic>(I)) - if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); - SDB->setCurDebugLoc(Loc); - if (MF->getDefaultDebugLoc().isUnknown()) - MF->setDefaultDebugLoc(Loc); - } - } - if (!isa<TerminatorInst>(I)) + if (MDDbgKind) + SetDebugLoc(MDDbgKind, TheMetadata, I, SDB, 0, MF); + + if (!isa<TerminatorInst>(I)) { SDB->visit(*I); + + // Set the current debug location back to "unknown" so that it doesn't + // spuriously apply to subsequent instructions. + ResetDebugLoc(SDB, 0); + } } if (!SDB->HasTailCall) { @@ -401,7 +428,9 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, HandlePHINodesInSuccessorBlocks(LLVMBB); // Lower the terminator after the copies are emitted. + SetDebugLoc(MDDbgKind, TheMetadata, LLVMBB->getTerminator(), SDB, 0, MF); SDB->visit(*LLVMBB->getTerminator()); + ResetDebugLoc(SDB, 0); } } @@ -498,75 +527,73 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Second step, hack on the DAG until it only uses operations and types that // the target supports. - if (!DisableLegalizeTypes) { - if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + - BlockName); + if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + + BlockName); + + bool Changed; + if (TimePassesIsEnabled) { + NamedRegionTimer T("Type Legalization", GroupName); + Changed = CurDAG->LegalizeTypes(); + } else { + Changed = CurDAG->LegalizeTypes(); + } + + DEBUG(errs() << "Type-legalized selection DAG:\n"); + DEBUG(CurDAG->dump()); - bool Changed; + if (Changed) { + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization", GroupName); - Changed = CurDAG->LegalizeTypes(); + NamedRegionTimer T("DAG Combining after legalize types", GroupName); + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } else { - Changed = CurDAG->LegalizeTypes(); + CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(errs() << "Type-legalized selection DAG:\n"); + DEBUG(errs() << "Optimized type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); + } - if (Changed) { - if (ViewDAGCombineLT) - CurDAG->viewGraph("dag-combine-lt input for " + BlockName); - - // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize types", GroupName); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); - } else { - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); - } - - DEBUG(errs() << "Optimized type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); - } + if (TimePassesIsEnabled) { + NamedRegionTimer T("Vector Legalization", GroupName); + Changed = CurDAG->LegalizeVectors(); + } else { + Changed = CurDAG->LegalizeVectors(); + } + if (Changed) { if (TimePassesIsEnabled) { - NamedRegionTimer T("Vector Legalization", GroupName); - Changed = CurDAG->LegalizeVectors(); + NamedRegionTimer T("Type Legalization 2", GroupName); + Changed = CurDAG->LegalizeTypes(); } else { - Changed = CurDAG->LegalizeVectors(); + Changed = CurDAG->LegalizeTypes(); } - if (Changed) { - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization 2", GroupName); - Changed = CurDAG->LegalizeTypes(); - } else { - Changed = CurDAG->LegalizeTypes(); - } - - if (ViewDAGCombineLT) - CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + if (ViewDAGCombineLT) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); - // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } - - DEBUG(errs() << "Optimized vector-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + // Run the DAG combiner in post-type-legalize mode. + if (TimePassesIsEnabled) { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + } else { + CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } + + DEBUG(errs() << "Optimized vector-legalized selection DAG:\n"); + DEBUG(CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); if (TimePassesIsEnabled) { NamedRegionTimer T("DAG Legalization", GroupName); - CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + CurDAG->Legalize(OptLevel); } else { - CurDAG->Legalize(DisableLegalizeTypes, OptLevel); + CurDAG->Legalize(OptLevel); } DEBUG(errs() << "Legalized selection DAG:\n"); @@ -738,24 +765,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, FastIS->startNewBlock(BB); // Do FastISel on as many instructions as possible. for (; BI != End; ++BI) { - if (MDDbgKind) { - // Update DebugLoc if debug information is attached with this - // instruction. - if (!isa<DbgInfoIntrinsic>(BI)) - if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) { - DILocation DILoc(Dbg); - DebugLoc Loc = ExtractDebugLocation(DILoc, - MF.getDebugLocInfo()); - FastIS->setCurDebugLoc(Loc); - if (MF.getDefaultDebugLoc().isUnknown()) - MF.setDefaultDebugLoc(Loc); - } - } - // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(BI)) if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { + ResetDebugLoc(SDB, FastIS); if (EnableFastISelVerbose || EnableFastISelAbort) { errs() << "FastISel miss: "; BI->dump(); @@ -765,13 +779,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, break; } + if (MDDbgKind) + SetDebugLoc(MDDbgKind, TheMetadata, BI, SDB, FastIS, &MF); + // First try normal tablegen-generated "fast" selection. - if (FastIS->SelectInstruction(BI)) + if (FastIS->SelectInstruction(BI)) { + ResetDebugLoc(SDB, FastIS); continue; + } - // Next, try calling the target to attempt to handle the instruction. - if (FastIS->TargetSelectInstruction(BI)) - continue; + // Clear out the debug location so that it doesn't carry over to + // unrelated instructions. + ResetDebugLoc(SDB, FastIS); // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(BI)) { @@ -786,10 +805,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, R = FuncInfo->CreateRegForValue(BI); } - SDB->setCurDebugLoc(FastIS->getCurDebugLoc()); - bool HadTailCall = false; - SelectBasicBlock(LLVMBB, BI, next(BI), HadTailCall); + SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall); // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { @@ -823,9 +840,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // not handled by FastISel. If FastISel is not run, this is the entire // block. if (BI != End) { - // If FastISel is run and it has known DebugLoc then use it. - if (FastIS && !FastIS->getCurDebugLoc().isUnknown()) - SDB->setCurDebugLoc(FastIS->getCurDebugLoc()); bool HadTailCall; SelectBasicBlock(LLVMBB, BI, End, HadTailCall); } @@ -1313,14 +1327,6 @@ SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) { N.getValueType()); } -SDNode *SelectionDAGISel::Select_DBG_LABEL(const SDValue &N) { - SDValue Chain = N.getOperand(0); - unsigned C = cast<LabelSDNode>(N)->getLabelID(); - SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32); - return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::DBG_LABEL, - MVT::Other, Tmp, Chain); -} - SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) { SDValue Chain = N.getOperand(0); unsigned C = cast<LabelSDNode>(N)->getLabelID(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index c5adc50..83fa5a8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -29,14 +29,14 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" -#include <fstream> using namespace llvm; namespace llvm { template<> struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} static bool hasEdgeDestLabels() { return true; @@ -50,6 +50,11 @@ namespace llvm { return ((const SDNode *) Node)->getValueType(i).getEVTString(); } + template<typename EdgeIter> + static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { + return itostr(I - SDNodeIterator::begin((SDNode *) Node)); + } + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge /// should actually target another edge source, not a node. If this method /// is implemented, getEdgeTarget should be implemented. @@ -76,12 +81,12 @@ namespace llvm { static bool renderGraphFromBottomUp() { return true; } - + static bool hasNodeAddressLabel(const SDNode *Node, const SelectionDAG *Graph) { return true; } - + /// If you want to override the dot attributes printed for a particular /// edge, override this method. template<typename EdgeIter> @@ -94,7 +99,7 @@ namespace llvm { return "color=blue,style=dashed"; return ""; } - + static std::string getSimpleNodeLabel(const SDNode *Node, const SelectionDAG *G) { @@ -132,7 +137,7 @@ namespace llvm { std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, const SelectionDAG *G) { - return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel (Node, G); + return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G); } @@ -142,7 +147,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " @@ -186,7 +191,7 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { #ifndef NDEBUG std::map<const SDNode *, std::string>::const_iterator I = NodeGraphAttrs.find(N); - + if (I != NodeGraphAttrs.end()) return I->second; else @@ -252,8 +257,7 @@ void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { // Visually mark that we hit the limit if (strcmp(Color, "red") == 0) { setSubgraphColorHelper(N, "blue", visited, 0, printed); - } - else if (strcmp(Color, "yellow") == 0) { + } else if (strcmp(Color, "yellow") == 0) { setSubgraphColorHelper(N, "green", visited, 0, printed); } } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 68bc2d6..1026169 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -911,7 +911,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TargetLoweringOpt &TLO, unsigned Depth) const { unsigned BitWidth = DemandedMask.getBitWidth(); - assert(Op.getValueSizeInBits() == BitWidth && + assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && "Mask size mismatches value type size!"); APInt NewMask = DemandedMask; DebugLoc dl = Op.getDebugLoc(); @@ -1240,7 +1240,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // demand the input sign bit. APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); if (HighBits.intersects(NewMask)) - InDemandedMask |= APInt::getSignBit(VT.getSizeInBits()); + InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, TLO, Depth+1)) @@ -2184,48 +2184,6 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, } -/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a -/// location that is 'Dist' units away from the location that the 'Base' load -/// is loading from. -bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, - unsigned Bytes, int Dist, - const MachineFrameInfo *MFI) const { - if (LD->getChain() != Base->getChain()) - return false; - EVT VT = LD->getValueType(0); - if (VT.getSizeInBits() / 8 != Bytes) - return false; - - SDValue Loc = LD->getOperand(1); - SDValue BaseLoc = Base->getOperand(1); - if (Loc.getOpcode() == ISD::FrameIndex) { - if (BaseLoc.getOpcode() != ISD::FrameIndex) - return false; - int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); - int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); - int FS = MFI->getObjectSize(FI); - int BFS = MFI->getObjectSize(BFI); - if (FS != BFS || FS != (int)Bytes) return false; - return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); - } - if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); - if (V && (V->getSExtValue() == Dist*Bytes)) - return true; - } - - GlobalValue *GV1 = NULL; - GlobalValue *GV2 = NULL; - int64_t Offset1 = 0; - int64_t Offset2 = 0; - bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); - if (isGA1 && isGA2 && GV1 == GV2) - return Offset1 == (Offset2 + Dist*Bytes); - return false; -} - - SDValue TargetLowering:: PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { // Default implementation: no optimization. diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 5876371..ed407eb 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -130,7 +130,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // See PR3149: // 172 %ECX<def> = MOV32rr %reg1039<kill> // 180 INLINEASM <es:subl $5,$1 - // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, %EAX<kill>, + // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, + // %EAX<kill>, // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0 // 188 %EAX<def> = MOV32rr %EAX<kill> // 196 %ECX<def> = MOV32rr %ECX<kill> @@ -281,12 +282,12 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { } } -/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA -/// being the source and IntB being the dest, thus this defines a value number -/// in IntB. If the source value number (in IntA) is defined by a commutable -/// instruction and its other operand is coalesced to the copy dest register, -/// see if we can transform the copy into a noop by commuting the definition. For -/// example, +/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with +/// IntA being the source and IntB being the dest, thus this defines a value +/// number in IntB. If the source value number (in IntA) is defined by a +/// commutable instruction and its other operand is coalesced to the copy dest +/// register, see if we can transform the copy into a noop by commuting the +/// definition. For example, /// /// A3 = op A2 B0<kill> /// ... @@ -508,7 +509,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (BHasSubRegs) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { LiveInterval &SRLI = li_->getInterval(*SR); - SRLI.MergeInClobberRange(*li_, AI->start, End, li_->getVNInfoAllocator()); + SRLI.MergeInClobberRange(*li_, AI->start, End, + li_->getVNInfoAllocator()); } } } @@ -708,7 +710,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, checkForDeadDef = true; } - MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); + MachineBasicBlock::iterator MII = + llvm::next(MachineBasicBlock::iterator(CopyMI)); tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_); MachineInstr *NewMI = prior(MII); @@ -1314,7 +1317,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { "coalesced to another register.\n"); return false; // Not coalescable. } - } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ + } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) { + // e.g. %reg16404:1<def> = MOV8rr %reg16412:2<kill> + Again = true; + return false; // Not coalescable. + } + } else { llvm_unreachable("Unrecognized copy instruction!"); } @@ -1611,9 +1620,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } } else { - // If the virtual register live interval is long but it has low use desity, - // do not join them, instead mark the physical register as its allocation - // preference. + // If the virtual register live interval is long but it has low use + // density, do not join them, instead mark the physical register as its + // allocation preference. LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; @@ -1938,6 +1947,10 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ if (Overlaps) { // If we haven't already recorded that this value # is safe, check it. if (!InVector(LHSIt->valno, EliminatedLHSVals)) { + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (LHSIt->valno->hasRedefByEC()) + return false; // Copy from the RHS? if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) return false; // Nope, bail out. @@ -1977,6 +1990,10 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // if coalescing succeeds. Just skip the liverange. if (++LHSIt == LHSEnd) break; } else { + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (LHSIt->valno->hasRedefByEC()) + return false; // Otherwise, if this is a copy from the RHS, mark it as being merged // in. if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { @@ -2316,6 +2333,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, if (LHSValNoAssignments[I->valno->id] != RHSValNoAssignments[J->valno->id]) return false; + // If it's re-defined by an early clobber somewhere in the live range, + // then conservatively abort coalescing. + if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC()) + return false; } if (I->end < J->end) { @@ -2401,9 +2422,15 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + bool isInsUndef = false; if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); + } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + DstReg = Inst->getOperand(0).getReg(); + SrcReg = Inst->getOperand(2).getReg(); + if (Inst->getOperand(1).isUndef()) + isInsUndef = true; } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG || Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) { DstReg = Inst->getOperand(0).getReg(); @@ -2413,7 +2440,8 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + if (isInsUndef || + (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())) ImpDefCopies.push_back(CopyRec(Inst, 0)); else if (SrcIsPhys || DstIsPhys) PhysCopies.push_back(CopyRec(Inst, 0)); @@ -2421,9 +2449,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, VirtCopies.push_back(CopyRec(Inst, 0)); } - // Try coalescing implicit copies first, followed by copies to / from - // physical registers, then finally copies from virtual registers to - // virtual registers. + // Try coalescing implicit copies and insert_subreg <undef> first, + // followed by copies to / from physical registers, then finally copies + // from virtual registers to virtual registers. for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { CopyRec &TheCopy = ImpDefCopies[i]; bool Again = false; @@ -2594,114 +2622,6 @@ void SimpleRegisterCoalescing::releaseMemory() { ReMatDefs.clear(); } -/// Returns true if the given live interval is zero length. -static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) { - for (LiveInterval::Ranges::const_iterator - i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) - if (i->end.getPrevIndex() > i->start) - return false; - return true; -} - - -void SimpleRegisterCoalescing::CalculateSpillWeights() { - SmallSet<unsigned, 4> Processed; - for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); - mbbi != mbbe; ++mbbi) { - MachineBasicBlock* MBB = mbbi; - SlotIndex MBBEnd = li_->getMBBEndIdx(MBB); - MachineLoop* loop = loopInfo->getLoopFor(MBB); - unsigned loopDepth = loop ? loop->getLoopDepth() : 0; - bool isExiting = loop ? loop->isLoopExiting(MBB) : false; - - for (MachineBasicBlock::const_iterator mii = MBB->begin(), mie = MBB->end(); - mii != mie; ++mii) { - const MachineInstr *MI = mii; - if (tii_->isIdentityCopy(*MI)) - continue; - - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - continue; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &mopi = MI->getOperand(i); - if (!mopi.isReg() || mopi.getReg() == 0) - continue; - unsigned Reg = mopi.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) - continue; - // Multiple uses of reg by the same instruction. It should not - // contribute to spill weight again. - if (!Processed.insert(Reg)) - continue; - - bool HasDef = mopi.isDef(); - bool HasUse = !HasDef; - for (unsigned j = i+1; j != e; ++j) { - const MachineOperand &mopj = MI->getOperand(j); - if (!mopj.isReg() || mopj.getReg() != Reg) - continue; - HasDef |= mopj.isDef(); - HasUse |= mopj.isUse(); - if (HasDef && HasUse) - break; - } - - LiveInterval &RegInt = li_->getInterval(Reg); - float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth); - if (HasDef && isExiting) { - // Looks like this is a loop count variable update. - SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); - const LiveRange *DLR = - li_->getInterval(Reg).getLiveRangeContaining(DefIdx); - if (DLR->end > MBBEnd) - Weight *= 3.0F; - } - RegInt.weight += Weight; - } - Processed.clear(); - } - } - - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { - LiveInterval &LI = *I->second; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // If the live interval length is essentially zero, i.e. in every live - // range the use follows def immediately, it doesn't make sense to spill - // it and hope it will be easier to allocate for this li. - if (isZeroLengthInterval(&LI, li_)) { - LI.weight = HUGE_VALF; - continue; - } - - bool isLoad = false; - SmallVector<LiveInterval*, 4> SpillIs; - if (li_->isReMaterializable(LI, SpillIs, isLoad)) { - // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If non of the defs are - // loads, then it's potentially very cheap to re-materialize. - // FIXME: this gets much more complicated once we support non-trivial - // re-materialization. - if (isLoad) - LI.weight *= 0.9F; - else - LI.weight *= 0.5F; - } - - // Slightly prefer live interval that has been assigned a preferred reg. - std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg); - if (Hint.first || Hint.second) - LI.weight *= 1.01F; - - // Divide the weight of the interval by its size. This encourages - // spilling of intervals that are large and have few uses, and - // discourages spilling of small intervals with many uses. - LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; - } - } -} - - bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { mf_ = &fn; mri_ = &fn.getRegInfo(); @@ -2727,7 +2647,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { joinIntervals(); DEBUG({ errs() << "********** INTERVALS POST JOINING **********\n"; - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){ + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); + I != E; ++I){ I->second->print(errs(), tri_); errs() << "\n"; } @@ -2768,7 +2689,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { DoDelete = true; } if (!DoDelete) - mii = next(mii); + mii = llvm::next(mii); else { li_->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); @@ -2831,8 +2752,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { } } - CalculateSpillWeights(); - DEBUG(dump()); return true; } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 78f8a9a..605a740 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -244,10 +244,6 @@ namespace llvm { MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, unsigned Reg, SlotIndex &LastUseIdx) const; - /// CalculateSpillWeights - Compute spill weights for all virtual register - /// live intervals. - void CalculateSpillWeights(); - void printRegName(unsigned reg) const; }; diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 237d0b5..bc246c1 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -20,22 +20,25 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include <set> using namespace llvm; namespace { - enum SpillerName { trivial, standard }; + enum SpillerName { trivial, standard, splitting }; } static cl::opt<SpillerName> spillerOpt("spiller", cl::desc("Spiller to use: (default: standard)"), cl::Prefix, - cl::values(clEnumVal(trivial, "trivial spiller"), - clEnumVal(standard, "default spiller"), + cl::values(clEnumVal(trivial, "trivial spiller"), + clEnumVal(standard, "default spiller"), + clEnumVal(splitting, "splitting spiller"), clEnumValEnd), cl::init(standard)); +// Spiller virtual destructor implementation. Spiller::~Spiller() {} namespace { @@ -140,9 +143,9 @@ protected: // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), next(miItr), newVReg, true, + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, true, ss, trc); - MachineInstr *storeInstr(next(miItr)); + MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); SlotIndex beginIndex = storeIndex.getPrevIndex(); @@ -170,7 +173,8 @@ public: : SpillerBase(mf, lis, vrm) {} std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs) { + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex*) { // Ignore spillIs - we don't use it. return trivialSpillEverywhere(li); } @@ -179,23 +183,336 @@ public: /// Falls back on LiveIntervals::addIntervalsForSpills. class StandardSpiller : public Spiller { -private: +protected: LiveIntervals *lis; const MachineLoopInfo *loopInfo; VirtRegMap *vrm; public: - StandardSpiller(MachineFunction *mf, LiveIntervals *lis, - const MachineLoopInfo *loopInfo, VirtRegMap *vrm) + StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo, + VirtRegMap *vrm) : lis(lis), loopInfo(loopInfo), vrm(vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs) { + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex*) { return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); } }; +/// When a call to spill is placed this spiller will first try to break the +/// interval up into its component values (one new interval per value). +/// If this fails, or if a call is placed to spill a previously split interval +/// then the spiller falls back on the standard spilling mechanism. +class SplittingSpiller : public StandardSpiller { +public: + SplittingSpiller(MachineFunction *mf, LiveIntervals *lis, + const MachineLoopInfo *loopInfo, VirtRegMap *vrm) + : StandardSpiller(lis, loopInfo, vrm) { + + mri = &mf->getRegInfo(); + tii = mf->getTarget().getInstrInfo(); + tri = mf->getTarget().getRegisterInfo(); + } + + std::vector<LiveInterval*> spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex *earliestStart) { + + if (worthTryingToSplit(li)) { + return tryVNISplit(li, earliestStart); + } + // else + return StandardSpiller::spill(li, spillIs, earliestStart); + } + +private: + + MachineRegisterInfo *mri; + const TargetInstrInfo *tii; + const TargetRegisterInfo *tri; + DenseSet<LiveInterval*> alreadySplit; + + bool worthTryingToSplit(LiveInterval *li) const { + return (!alreadySplit.count(li) && li->getNumValNums() > 1); + } + + /// Try to break a LiveInterval into its component values. + std::vector<LiveInterval*> tryVNISplit(LiveInterval *li, + SlotIndex *earliestStart) { + + DEBUG(errs() << "Trying VNI split of %reg" << *li << "\n"); + + std::vector<LiveInterval*> added; + SmallVector<VNInfo*, 4> vnis; + + std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis)); + + for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(), + vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) { + VNInfo *vni = *vniItr; + + // Skip unused VNIs, or VNIs with no kills. + if (vni->isUnused() || vni->kills.empty()) + continue; + + DEBUG(errs() << " Extracted Val #" << vni->id << " as "); + LiveInterval *splitInterval = extractVNI(li, vni); + + if (splitInterval != 0) { + DEBUG(errs() << *splitInterval << "\n"); + added.push_back(splitInterval); + alreadySplit.insert(splitInterval); + if (earliestStart != 0) { + if (splitInterval->beginIndex() < *earliestStart) + *earliestStart = splitInterval->beginIndex(); + } + } else { + DEBUG(errs() << "0\n"); + } + } + + DEBUG(errs() << "Original LI: " << *li << "\n"); + + // If there original interval still contains some live ranges + // add it to added and alreadySplit. + if (!li->empty()) { + added.push_back(li); + alreadySplit.insert(li); + if (earliestStart != 0) { + if (li->beginIndex() < *earliestStart) + *earliestStart = li->beginIndex(); + } + } + + return added; + } + + /// Extract the given value number from the interval. + LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const { + assert(vni->isDefAccurate() || vni->isPHIDef()); + assert(!vni->kills.empty()); + + // Create a new vreg and live interval, copy VNI kills & ranges over. + const TargetRegisterClass *trc = mri->getRegClass(li->reg); + unsigned newVReg = mri->createVirtualRegister(trc); + vrm->grow(); + LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); + VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator()); + + // Start by copying all live ranges in the VN to the new interval. + for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end(); + rItr != rEnd; ++rItr) { + if (rItr->valno == vni) { + newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI)); + } + } + + // Erase the old VNI & ranges. + li->removeValNo(vni); + + // Collect all current uses of the register belonging to the given VNI. + // We'll use this to rename the register after we've dealt with the def. + std::set<MachineInstr*> uses; + for (MachineRegisterInfo::use_iterator + useItr = mri->use_begin(li->reg), useEnd = mri->use_end(); + useItr != useEnd; ++useItr) { + uses.insert(&*useItr); + } + + // Process the def instruction for this VNI. + if (newVNI->isPHIDef()) { + // Insert a copy at the start of the MBB. The range proceeding the + // copy will be attached to the original LiveInterval. + MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def); + tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc); + MachineInstr *copyMI = defMBB->begin(); + copyMI->addRegisterKilled(li->reg, tri); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB), + 0, false, lis->getVNInfoAllocator()); + phiDefVNI->setIsPHIDef(true); + phiDefVNI->addKill(copyIdx.getDefIndex()); + li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI)); + LiveRange *oldPHIDefRange = + newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB)); + + // If the old phi def starts in the middle of the range chop it up. + if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) { + LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end, + oldPHIDefRange->valno); + oldPHIDefRange->end = lis->getMBBStartIdx(defMBB); + newLI->addRange(oldPHIDefRange2); + } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) { + // Otherwise if it's at the start of the range just trim it. + oldPHIDefRange->start = copyIdx.getDefIndex(); + } else { + assert(false && "PHI def range doesn't cover PHI def?"); + } + + newVNI->def = copyIdx.getDefIndex(); + newVNI->setCopy(copyMI); + newVNI->setIsPHIDef(false); // not a PHI def anymore. + newVNI->setIsDefAccurate(true); + } else { + // non-PHI def. Rename the def. If it's two-addr that means renaming the use + // and inserting a new copy too. + MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def); + // We'll rename this now, so we can remove it from uses. + uses.erase(defInst); + unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg); + bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx), + twoAddrUseIsUndef = false; + + for (unsigned i = 0; i < defInst->getNumOperands(); ++i) { + MachineOperand &mo = defInst->getOperand(i); + if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) { + mo.setReg(newVReg); + if (isTwoAddr && mo.isUse() && mo.isUndef()) + twoAddrUseIsUndef = true; + } + } + + SlotIndex defIdx = lis->getInstructionIndex(defInst); + newVNI->def = defIdx.getDefIndex(); + + if (isTwoAddr && !twoAddrUseIsUndef) { + MachineBasicBlock *defMBB = defInst->getParent(); + tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc); + MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst)); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + copyMI->addRegisterKilled(li->reg, tri); + LiveRange *origUseRange = + li->getLiveRangeContaining(newVNI->def.getUseIndex()); + VNInfo *origUseVNI = origUseRange->valno; + origUseRange->end = copyIdx.getDefIndex(); + bool updatedKills = false; + for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) { + if (origUseVNI->kills[k] == defIdx.getDefIndex()) { + origUseVNI->kills[k] = copyIdx.getDefIndex(); + updatedKills = true; + break; + } + } + assert(updatedKills && "Failed to update VNI kill list."); + VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI, + true, lis->getVNInfoAllocator()); + copyVNI->addKill(defIdx.getDefIndex()); + LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI); + newLI->addRange(copyRange); + } + } + + for (std::set<MachineInstr*>::iterator + usesItr = uses.begin(), usesEnd = uses.end(); + usesItr != usesEnd; ++usesItr) { + MachineInstr *useInst = *usesItr; + SlotIndex useIdx = lis->getInstructionIndex(useInst); + LiveRange *useRange = + newLI->getLiveRangeContaining(useIdx.getUseIndex()); + + // If this use doesn't belong to the new interval skip it. + if (useRange == 0) + continue; + + // This use doesn't belong to the VNI, skip it. + if (useRange->valno != newVNI) + continue; + + // Check if this instr is two address. + unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg); + bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx); + + // Rename uses (and defs for two-address instrs). + for (unsigned i = 0; i < useInst->getNumOperands(); ++i) { + MachineOperand &mo = useInst->getOperand(i); + if (mo.isReg() && (mo.isUse() || isTwoAddress) && + (mo.getReg() == li->reg)) { + mo.setReg(newVReg); + } + } + + // If this is a two address instruction we've got some extra work to do. + if (isTwoAddress) { + // We modified the def operand, so we need to copy back to the original + // reg. + MachineBasicBlock *useMBB = useInst->getParent(); + MachineBasicBlock::iterator useItr(useInst); + tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc); + MachineInstr *copyMI = next(useItr); + copyMI->addRegisterKilled(newVReg, tri); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + + // Change the old two-address defined range & vni to start at + // (and be defined by) the copy. + LiveRange *origDefRange = + li->getLiveRangeContaining(useIdx.getDefIndex()); + origDefRange->start = copyIdx.getDefIndex(); + origDefRange->valno->def = copyIdx.getDefIndex(); + origDefRange->valno->setCopy(copyMI); + + // Insert a new range & vni for the two-address-to-copy value. This + // will be attached to the new live interval. + VNInfo *copyVNI = + newLI->getNextValue(useIdx.getDefIndex(), 0, true, + lis->getVNInfoAllocator()); + copyVNI->addKill(copyIdx.getDefIndex()); + LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI); + newLI->addRange(copyRange); + } + } + + // Iterate over any PHI kills - we'll need to insert new copies for them. + for (VNInfo::KillSet::iterator + killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end(); + killItr != killEnd; ++killItr) { + SlotIndex killIdx(*killItr); + if (killItr->isPHI()) { + MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); + LiveRange *oldKillRange = + newLI->getLiveRangeContaining(killIdx); + + assert(oldKillRange != 0 && "No kill range?"); + + tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(), + li->reg, newVReg, trc, trc); + MachineInstr *copyMI = prior(killMBB->getFirstTerminator()); + copyMI->addRegisterKilled(newVReg, tri); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); + + // Save the current end. We may need it to add a new range if the + // current range runs of the end of the MBB. + SlotIndex newKillRangeEnd = oldKillRange->end; + oldKillRange->end = copyIdx.getDefIndex(); + + if (newKillRangeEnd != lis->getMBBEndIdx(killMBB).getNextSlot()) { + assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB).getNextSlot() && + "PHI kill range doesn't reach kill-block end. Not sane."); + newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB).getNextSlot(), + newKillRangeEnd, newVNI)); + } + + *killItr = oldKillRange->end; + VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), + copyMI, true, + lis->getVNInfoAllocator()); + newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB)); + newKillVNI->setHasPHIKill(true); + li->addRange(LiveRange(copyIdx.getDefIndex(), + lis->getMBBEndIdx(killMBB).getNextSlot(), + newKillVNI)); + } + + } + + newVNI->setHasPHIKill(false); + + return newLI; + } + +}; + } llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, @@ -203,7 +520,8 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) { switch (spillerOpt) { case trivial: return new TrivialSpiller(mf, lis, vrm); break; - case standard: return new StandardSpiller(mf, lis, loopInfo, vrm); break; + case standard: return new StandardSpiller(lis, loopInfo, vrm); break; + case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break; default: llvm_unreachable("Unreachable!"); break; } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index c6bd985..dda52e8 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -21,6 +21,7 @@ namespace llvm { class MachineFunction; class MachineInstr; class MachineLoopInfo; + class SlotIndex; class VirtRegMap; class VNInfo; @@ -35,7 +36,8 @@ namespace llvm { /// Spill the given live range. The method used will depend on the Spiller /// implementation selected. virtual std::vector<LiveInterval*> spill(LiveInterval *li, - SmallVectorImpl<LiveInterval*> &spillIs) = 0; + SmallVectorImpl<LiveInterval*> &spillIs, + SlotIndex *earliestIndex = 0) = 0; }; diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index c299192..fd25a37 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -668,7 +668,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; - MachineBasicBlock::iterator NextMI = next(I); + MachineBasicBlock::iterator NextMI = llvm::next(I); if (NextMI == MBB->end()) continue; int FirstSS, SecondSS; diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 9c0b596..bf58902 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -17,15 +17,19 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; +STATISTIC(NumTails , "Number of tails duplicated"); STATISTIC(NumTailDups , "Number of tail duplicated blocks"); STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); @@ -36,34 +40,71 @@ TailDuplicateSize("tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden); +static cl::opt<bool> +TailDupVerify("tail-dup-verify", + cl::desc("Verify sanity of PHI instructions during taildup"), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> +TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); + +typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; + namespace { /// TailDuplicatePass - Perform tail duplication. class TailDuplicatePass : public MachineFunctionPass { + bool PreRegAlloc; const TargetInstrInfo *TII; MachineModuleInfo *MMI; + MachineRegisterInfo *MRI; + + // SSAUpdateVRs - A list of virtual registers for which to update SSA form. + SmallVector<unsigned, 16> SSAUpdateVRs; + + // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of + // source virtual registers. + DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; public: static char ID; - explicit TailDuplicatePass() : MachineFunctionPass(&ID) {} + explicit TailDuplicatePass(bool PreRA) : + MachineFunctionPass(&ID), PreRegAlloc(PreRA) {} virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "Tail Duplication"; } private: + void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB); + void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies); + void DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap); + void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallSetVector<MachineBasicBlock*, 8> &Succs); bool TailDuplicateBlocks(MachineFunction &MF); - bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF); + bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies); void RemoveDeadBlock(MachineBasicBlock *MBB); }; char TailDuplicatePass::ID = 0; } -FunctionPass *llvm::createTailDuplicatePass() { - return new TailDuplicatePass(); +FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) { + return new TailDuplicatePass(PreRegAlloc); } bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + MRI = &MF.getRegInfo(); MMI = getAnalysisIfAvailable<MachineModuleInfo>(); bool MadeChange = false; @@ -77,36 +118,325 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } +static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = I; + SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), + MBB->pred_end()); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != MBB->end()) { + if (MI->getOpcode() != TargetInstrInfo::PHI) + break; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + bool Found = false; + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (PHIBB == PredBB) { + Found = true; + break; + } + } + if (!Found) { + errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + errs() << " missing input from predecessor BB#" + << PredBB->getNumber() << '\n'; + llvm_unreachable(0); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (CheckExtra && !Preds.count(PHIBB)) { + // This is not a hard error. + errs() << "Warning: malformed PHI in BB#" << MBB->getNumber() + << ": " << *MI; + errs() << " extra input from predecessor BB#" + << PHIBB->getNumber() << '\n'; + } + if (PHIBB->getNumber() < 0) { + errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + errs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + llvm_unreachable(0); + } + } + ++MI; + } + } +} + /// TailDuplicateBlocks - Look for small blocks that are unconditionally /// branched to and do not fall through. Tail-duplicate their instructions /// into their predecessors to eliminate (dynamic) branches. bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; + if (PreRegAlloc && TailDupVerify) { + DEBUG(errs() << "\n*** Before tail-duplicating\n"); + VerifyPHIs(MF, true); + } + + SmallVector<MachineInstr*, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; + if (NumTails == TailDupLimit) + break; + // Only duplicate blocks that end with unconditional branches. if (MBB->canFallThrough()) continue; - MadeChange |= TailDuplicate(MBB, MF); - - // If it is dead, remove it. - if (MBB->pred_empty()) { - NumInstrDups -= MBB->size(); - RemoveDeadBlock(MBB); + // Save the successors list. + SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock*, 8> TDBBs; + SmallVector<MachineInstr*, 16> Copies; + if (TailDuplicate(MBB, MF, TDBBs, Copies)) { + ++NumTails; + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty(); + if (PreRegAlloc) + UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = 0; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = UI.getOperand(); + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI->getParent() == DefBB) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + unsigned Src, Dst, SrcSR, DstSR; + if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) { + MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); + if (++UI == MRI->use_end()) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + } + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); MadeChange = true; - ++NumDeadBlocks; } } + return MadeChange; } +static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI->getParent() != BB) + return true; + } + return false; +} + +static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) + if (MI->getOperand(i+1).getMBB() == SrcBB) + return i; + return 0; +} + +/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for +/// SSA update. +void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB) { + DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); + if (LI != SSAUpdateVals.end()) + LI->second.push_back(std::make_pair(BB, NewReg)); + else { + AvailableValsTy Vals; + Vals.push_back(std::make_pair(BB, NewReg)); + SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); + SSAUpdateVRs.push_back(OrigReg); + } +} + +/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. +/// Remember the source register that's contributed by PredBB and update SSA +/// update map. +void TailDuplicatePass::ProcessPHI(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) { + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); + assert(SrcOpIdx && "Unable to find matching PHI source?"); + unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); + + // Insert a copy from source to the end of the block. The def register is the + // available value liveout of the block. + unsigned NewDef = MRI->createVirtualRegister(RC); + Copies.push_back(std::make_pair(NewDef, SrcReg)); + if (isDefLiveOut(DefReg, TailBB, MRI)) + AddSSAUpdateEntry(DefReg, NewDef, PredBB); + + // Remove PredBB from the PHI node. + MI->RemoveOperand(SrcOpIdx+1); + MI->RemoveOperand(SrcOpIdx); + if (MI->getNumOperands() == 1) + MI->eraseFromParent(); +} + +/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update +/// the source operands due to earlier PHI translation. +void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap) { + MachineInstr *NewMI = MF.CloneMachineInstr(MI); + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO.isDef()) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + unsigned NewReg = MRI->createVirtualRegister(RC); + MO.setReg(NewReg); + LocalVRMap.insert(std::make_pair(Reg, NewReg)); + if (isDefLiveOut(Reg, TailBB, MRI)) + AddSSAUpdateEntry(Reg, NewReg, PredBB); + } else { + DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); + if (VI != LocalVRMap.end()) + MO.setReg(VI->second); + } + } + PredBB->insert(PredBB->end(), NewMI); +} + +/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor +/// blocks, the successors have gained new predecessors. Update the PHI +/// instructions in them accordingly. +void +TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallSetVector<MachineBasicBlock*,8> &Succs) { + for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), + SE = Succs.end(); SI != SE; ++SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); + II != EE; ++II) { + if (II->getOpcode() != TargetInstrInfo::PHI) + break; + unsigned Idx = 0; + for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + Idx = i; + break; + } + } + + assert(Idx != 0); + MachineOperand &MO0 = II->getOperand(Idx); + unsigned Reg = MO0.getReg(); + if (isDead) { + // Folded into the previous BB. + // There could be duplicate phi source entries. FIXME: Should sdisel + // or earlier pass fixed this? + for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + II->RemoveOperand(i+1); + II->RemoveOperand(i); + } + } + II->RemoveOperand(Idx+1); + II->RemoveOperand(Idx); + } + DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); + if (LI != SSAUpdateVals.end()) { + // This register is defined in the tail block. + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + II->addOperand(MachineOperand::CreateReg(SrcReg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } + } else { + // Live in tail block, must also be live in predecessors. + for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = TDBBs[j]; + II->addOperand(MachineOperand::CreateReg(Reg, false)); + II->addOperand(MachineOperand::CreateMBB(SrcBB)); + } + } + } + } +} + /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each /// of its predecessors. -bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, - MachineFunction &MF) { +bool +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, + SmallVector<MachineBasicBlock*, 8> &TDBBs, + SmallVector<MachineInstr*, 16> &Copies) { // Don't try to tail-duplicate single-block loops. if (TailBB->isSuccessor(TailBB)) return false; @@ -129,28 +459,36 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // Check the instructions in the block to determine whether tail-duplication // is invalid or unlikely to be profitable. - unsigned i = 0; + unsigned InstrCount = 0; bool HasCall = false; for (MachineBasicBlock::iterator I = TailBB->begin(); - I != TailBB->end(); ++I, ++i) { + I != TailBB->end(); ++I) { // Non-duplicable things shouldn't be tail-duplicated. if (I->getDesc().isNotDuplicable()) return false; + // Do not duplicate 'return' instructions if this is a pre-regalloc run. + // A return may expand into a lot more instructions (e.g. reload of callee + // saved registers) after PEI. + if (PreRegAlloc && I->getDesc().isReturn()) return false; // Don't duplicate more than the threshold. - if (i == MaxDuplicateCount) return false; + if (InstrCount == MaxDuplicateCount) return false; // Remember if we saw a call. if (I->getDesc().isCall()) HasCall = true; + if (I->getOpcode() != TargetInstrInfo::PHI) + InstrCount += 1; } // Heuristically, don't tail-duplicate calls if it would expand code size, // as it's less likely to be worth the extra cost. - if (i > 1 && HasCall) + if (InstrCount > 1 && HasCall) return false; + DEBUG(errs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + // Iterate through all the unique predecessors and tail-duplicate this // block into them, if possible. Copying the list ahead of time also // avoids trouble with the predecessor list reallocating. bool Changed = false; - SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), - TailBB->pred_end()); + SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; @@ -175,13 +513,35 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB << "From Succ: " << *TailBB); + TDBBs.push_back(PredBB); + // Remove PredBB's unconditional branch. TII->RemoveBranch(*PredBB); + // Clone the contents of TailBB into PredBB. - for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); - I != E; ++I) { - MachineInstr *NewMI = MF.CloneMachineInstr(I); - PredBB->insert(PredBB->end(), NewMI); + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + while (I != TailBB->end()) { + MachineInstr *MI = &*I; + ++I; + if (MI->getOpcode() == TargetInstrInfo::PHI) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos); + } else { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap); + } + } + MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); + TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first, + CopyInfos[i].second, RC,RC); + MachineInstr *CopyMI = prior(Loc); + Copies.push_back(CopyMI); } NumInstrDups += TailBB->size() - 1; // subtract one for removed branch @@ -190,8 +550,8 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), - E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I); + E = TailBB->succ_end(); I != E; ++I) + PredBB->addSuccessor(*I); Changed = true; ++NumTailDups; @@ -200,22 +560,56 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // If TailBB was duplicated into all its predecessors except for the prior // block, which falls through unconditionally, move the contents of this // block into the prior block. - MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; SmallVector<MachineOperand, 4> PriorCond; bool PriorUnAnalyzable = - TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true); // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && - TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 && + TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 && !TailBB->hasAddressTaken()) { - DEBUG(errs() << "\nMerging into block: " << PrevBB + DEBUG(errs() << "\nMerging into block: " << *PrevBB << "From MBB: " << *TailBB); - PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end()); - PrevBB.removeSuccessor(PrevBB.succ_begin());; - assert(PrevBB.succ_empty()); - PrevBB.transferSuccessors(TailBB); + if (PreRegAlloc) { + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->getOpcode() == TargetInstrInfo::PHI) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos); + if (MI->getParent()) + MI->eraseFromParent(); + } + + // Now copy the non-PHI instructions. + while (I != TailBB->end()) { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + MachineInstr *MI = &*I++; + DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap); + MI->eraseFromParent(); + } + MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); + TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first, + CopyInfos[i].second, RC, RC); + MachineInstr *CopyMI = prior(Loc); + Copies.push_back(CopyMI); + } + } else { + // No PHIs to worry about, just splice the instructions over. + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); + } + PrevBB->removeSuccessor(PrevBB->succ_begin()); + assert(PrevBB->succ_empty()); + PrevBB->transferSuccessors(TailBB); + TDBBs.push_back(PrevBB); Changed = true; } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 102e2a3..393e315 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -329,7 +329,7 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * return false; // For the def, it should be the only def of that register. - if (MO.isDef() && (next(MRI.def_begin(Reg)) != MRI.def_end() || + if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() || MRI.isLiveIn(Reg))) return false; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 5fa690b..98b95ac 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -211,7 +211,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, ++KillPos; unsigned NumVisited = 0; - for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) { + for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) { MachineInstr *OtherMI = I; if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. return false; @@ -412,7 +412,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); // If there are multiple defs, we can't do a simple analysis, so just // go with what the kill flag says. - if (next(Begin) != MRI->def_end()) + if (llvm::next(Begin) != MRI->def_end()) return true; DefMI = &*Begin; bool IsSrcPhys, IsDstPhys; @@ -643,7 +643,7 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, if (!Sunk) { DistanceMap.insert(std::make_pair(NewMI, Dist)); mi = NewMI; - nmi = next(mi); + nmi = llvm::next(mi); } return true; } @@ -923,7 +923,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { Processed.clear(); for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); mi != me; ) { - MachineBasicBlock::iterator nmi = next(mi); + MachineBasicBlock::iterator nmi = llvm::next(mi); const TargetInstrDesc &TID = mi->getDesc(); bool FirstTied = true; diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 10c8066..054c3b6 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -754,7 +754,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, } // Skip over the same register. - std::multimap<unsigned, int>::iterator NI = next(I); + std::multimap<unsigned, int>::iterator NI = llvm::next(I); while (NI != E && NI->first == Reg) { ++I; ++NI; @@ -1133,7 +1133,7 @@ private: std::vector<MachineOperand*> &KillOps, VirtRegMap &VRM) { - MachineBasicBlock::iterator NextMII = next(MII); + MachineBasicBlock::iterator NextMII = llvm::next(MII); if (NextMII == MBB.end()) return false; @@ -1186,7 +1186,7 @@ private: // Unfold next instructions that fold the same SS. do { MachineInstr &NextMI = *NextMII; - NextMII = next(NextMII); + NextMII = llvm::next(NextMII); NewMIs.clear(); if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) llvm_unreachable("Unable unfold the load / store folding instruction!"); @@ -1463,8 +1463,8 @@ private: std::vector<MachineOperand*> &KillOps, VirtRegMap &VRM) { - MachineBasicBlock::iterator oldNextMII = next(MII); - TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC); + MachineBasicBlock::iterator oldNextMII = llvm::next(MII); + TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC); MachineInstr *StoreMI = prior(oldNextMII); VRM.addSpillSlotUse(StackSlot, StoreMI); DEBUG(errs() << "Store:\t" << *StoreMI); @@ -1626,14 +1626,14 @@ private: DistanceMap.clear(); for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MII != E; ) { - MachineBasicBlock::iterator NextMII = next(MII); + MachineBasicBlock::iterator NextMII = llvm::next(MII); VirtRegMap::MI2VirtMapTy::const_iterator I, End; bool Erased = false; bool BackTracked = false; if (OptimizeByUnfold(MBB, MII, MaybeDeadStores, Spills, RegKills, KillOps, VRM)) - NextMII = next(MII); + NextMII = llvm::next(MII); MachineInstr &MI = *MII; @@ -1657,7 +1657,7 @@ private: // Back-schedule reloads and remats. MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(next(MII), MBB.begin(), PhysReg, TRI, false, + ComputeReloadLoc(llvm::next(MII), MBB.begin(), PhysReg, TRI, false, SS, TII, MF); TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC); @@ -1667,7 +1667,7 @@ private: ++NumPSpills; DistanceMap.insert(std::make_pair(LoadMI, Dist++)); } - NextMII = next(MII); + NextMII = llvm::next(MII); } // Insert restores here if asked to. @@ -1785,14 +1785,14 @@ private: const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); unsigned Phys = VRM.getPhys(VirtReg); int StackSlot = VRM.getStackSlot(VirtReg); - MachineBasicBlock::iterator oldNextMII = next(MII); - TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC); + MachineBasicBlock::iterator oldNextMII = llvm::next(MII); + TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC); MachineInstr *StoreMI = prior(oldNextMII); VRM.addSpillSlotUse(StackSlot, StoreMI); DEBUG(errs() << "Store:\t" << *StoreMI); VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); } - NextMII = next(MII); + NextMII = llvm::next(MII); } /// ReusedOperands - Keep track of operand reuse in case we need to undo @@ -2265,7 +2265,7 @@ private: if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot, Spills, RegKills, KillOps, TRI, VRM)) { - NextMII = next(MII); + NextMII = llvm::next(MII); BackTracked = true; goto ProcessNextInst; } @@ -2381,7 +2381,7 @@ private: MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true, LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM); - NextMII = next(MII); + NextMII = llvm::next(MII); // Check to see if this is a noop copy. If so, eliminate the // instruction before considering the dest reg to be changed. diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 6d781c7..26afa54 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -208,7 +208,7 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode, - CodeModel::Model CMM) { + CodeModel::Model CMM) { // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) @@ -681,7 +681,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { if (Ptr) return Ptr; // If the global is external, just remember the address. - if (GV->isDeclaration()) { + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) { #if HAVE___DSO_HANDLE if (GV->getName() == "__dso_handle") return (void*)&__dso_handle; diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index f2b28ad..0193486 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -175,7 +175,6 @@ struct KeyInfo { static inline unsigned getTombstoneKey() { return -2U; } static unsigned getHashValue(const unsigned &Key) { return Key; } static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; /// ActionEntry - Structure describing an entry in the actions table. diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp index b45c71f..52a8f71 100644 --- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp @@ -69,24 +69,18 @@ OProfileJITEventListener::~OProfileJITEventListener() { } class FilenameCache { - // Holds the filename of each Scope, so that we can pass the - // pointer into oprofile. These char*s are freed in the destructor. - DenseMap<MDNode*, char*> Filenames; + // Holds the filename of each Scope, so that we can pass a null-terminated + // string into oprofile. + DenseMap<MDNode*, std::string> Filenames; public: const char *getFilename(MDNode *Scope) { - char *&Filename = Filenames[Scope]; - if (Filename == NULL) { + std::string &Filename = Filenames[Scope]; + if (Filename.empty()) { DIScope S(Scope); - Filename = strdup(S.getFilename()); - } - return Filename; - } - ~FilenameCache() { - for (DenseMap<MDNode*, char*>::iterator - I = Filenames.begin(), E = Filenames.end(); I != E; ++I) { - free(I->second); + Filename = S.getFilename(); } + return Filename.c_str(); } }; diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index cd355ff..ac736dc 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMSupport CommandLine.cpp ConstantRange.cpp Debug.cpp + DeltaAlgorithm.cpp Dwarf.cpp ErrorHandling.cpp FileUtilities.cpp diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 9cf9c89..b6c0e08 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -778,9 +778,10 @@ void cl::ParseCommandLineOptions(int argc, char **argv, free(*i); } - DEBUG(errs() << "\nArgs: "; + DEBUG(errs() << "Args: "; for (int i = 0; i < argc; ++i) errs() << argv[i] << ' '; + errs() << '\n'; ); // If we had an error processing our arguments, don't let the program execute diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp new file mode 100644 index 0000000..d176548 --- /dev/null +++ b/lib/Support/DeltaAlgorithm.cpp @@ -0,0 +1,114 @@ +//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DeltaAlgorithm.h" +#include <algorithm> +#include <iterator> +using namespace llvm; + +DeltaAlgorithm::~DeltaAlgorithm() { +} + +bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) { + if (FailedTestsCache.count(Changes)) + return false; + + bool Result = ExecuteOneTest(Changes); + if (!Result) + FailedTestsCache.insert(Changes); + + return Result; +} + +void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) { + // FIXME: Allow clients to provide heuristics for improved splitting. + + // FIXME: This is really slow. + changeset_ty LHS, RHS; + unsigned idx = 0; + for (changeset_ty::const_iterator it = S.begin(), + ie = S.end(); it != ie; ++it, ++idx) + ((idx & 1) ? LHS : RHS).insert(*it); + if (!LHS.empty()) + Res.push_back(LHS); + if (!RHS.empty()) + Res.push_back(RHS); +} + +DeltaAlgorithm::changeset_ty +DeltaAlgorithm::Delta(const changeset_ty &Changes, + const changesetlist_ty &Sets) { + // Invariant: union(Res) == Changes + UpdatedSearchState(Changes, Sets); + + // If there is nothing left we can remove, we are done. + if (Sets.size() <= 1) + return Changes; + + // Look for a passing subset. + changeset_ty Res; + if (Search(Changes, Sets, Res)) + return Res; + + // Otherwise, partition the sets if possible; if not we are done. + changesetlist_ty SplitSets; + for (changesetlist_ty::const_iterator it = Sets.begin(), + ie = Sets.end(); it != ie; ++it) + Split(*it, SplitSets); + if (SplitSets.size() == Sets.size()) + return Changes; + + return Delta(Changes, SplitSets); +} + +bool DeltaAlgorithm::Search(const changeset_ty &Changes, + const changesetlist_ty &Sets, + changeset_ty &Res) { + // FIXME: Parallelize. + for (changesetlist_ty::const_iterator it = Sets.begin(), + ie = Sets.end(); it != ie; ++it) { + // If the test passes on this subset alone, recurse. + if (GetTestResult(*it)) { + changesetlist_ty Sets; + Split(*it, Sets); + Res = Delta(*it, Sets); + return true; + } + + // Otherwise, if we have more than two sets, see if test passes on the + // complement. + if (Sets.size() > 2) { + // FIXME: This is really slow. + changeset_ty Complement; + std::set_difference( + Changes.begin(), Changes.end(), it->begin(), it->end(), + std::insert_iterator<changeset_ty>(Complement, Complement.begin())); + if (GetTestResult(Complement)) { + changesetlist_ty ComplementSets; + ComplementSets.insert(ComplementSets.end(), Sets.begin(), it); + ComplementSets.insert(ComplementSets.end(), it + 1, Sets.end()); + Res = Delta(Complement, ComplementSets); + return true; + } + } + } + + return false; +} + +DeltaAlgorithm::changeset_ty DeltaAlgorithm::Run(const changeset_ty &Changes) { + // Check empty set first to quickly find poor test functions. + if (GetTestResult(changeset_ty())) + return changeset_ty(); + + // Otherwise run the real delta algorithm. + changesetlist_ty Sets; + Split(Changes, Sets); + + return Delta(Changes, Sets); +} diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index b04864a..df1aa6a 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -176,7 +176,7 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, #endif int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags); if (FD == -1) { - if (ErrStr) *ErrStr = "could not open file"; + if (ErrStr) *ErrStr = strerror(errno); return 0; } @@ -186,7 +186,7 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, struct stat FileInfo; // TODO: This should use fstat64 when available. if (fstat(FD, &FileInfo) == -1) { - if (ErrStr) *ErrStr = "could not get file length"; + if (ErrStr) *ErrStr = strerror(errno); ::close(FD); return 0; } @@ -230,8 +230,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, // try again } else { // error reading. + if (ErrStr) *ErrStr = strerror(errno); close(FD); - if (ErrStr) *ErrStr = "error reading file data"; return 0; } } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 31451cc..0c90e77 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -209,8 +209,7 @@ raw_ostream &raw_ostream::operator<<(const void *P) { } raw_ostream &raw_ostream::operator<<(double N) { - this->operator<<(ftostr(N)); - return *this; + return this->operator<<(ftostr(N)); } diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp index f9b55a1..7ba8b77 100644 --- a/lib/System/Atomic.cpp +++ b/lib/System/Atomic.cpp @@ -85,7 +85,7 @@ sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) { #elif defined(__GNUC__) return __sync_add_and_fetch(ptr, val); #elif defined(_MSC_VER) - return InterlockedAdd(ptr, val); + return InterlockedExchangeAdd(ptr, val) + val; #else # error No atomic add implementation for your platform! #endif diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp index e112698..79897e4 100644 --- a/lib/System/Host.cpp +++ b/lib/System/Host.cpp @@ -103,11 +103,8 @@ static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, unsigned &Model Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 } } -#endif - std::string sys::getHostCPUName() { -#if defined(__x86_64__) || defined(__i386__) unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) return "generic"; @@ -295,7 +292,10 @@ std::string sys::getHostCPUName() { return "generic"; } } -#endif - return "generic"; } +#else +std::string sys::getHostCPUName() { + return "generic"; +} +#endif diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index ff1497a..33b26f7 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -77,7 +77,7 @@ inline bool lastIsSlash(const std::string& path) { namespace llvm { using namespace sys; -extern const char sys::PathSeparator = ':'; +const char sys::PathSeparator = ':'; Path::Path(const std::string& p) : path(p) {} diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index ff1980d..21445ad 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -109,7 +109,6 @@ FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); -FunctionPass *createARMMaxStackAlignmentCalculatorPass(); extern Target TheARMTarget, TheThumbTarget; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c95d4c8..1aae369 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -418,11 +418,13 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { return true; } -/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing +/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. +DISABLE_INLINE static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, - unsigned JTI) DISABLE_INLINE; + unsigned JTI); static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, unsigned JTI) { + assert(JTI < JT.size()); return JT[JTI].MBBs.size(); } @@ -467,6 +469,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return MI->getOperand(2).getImm(); case ARM::Int_eh_sjlj_setjmp: return 24; + case ARM::tInt_eh_sjlj_setjmp: + return 22; case ARM::t2Int_eh_sjlj_setjmp: return 22; case ARM::BR_JTr: @@ -755,7 +759,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass || RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); - // FIXME: Neon instructions should support predicates if (Align >= 16 && (getRegisterInfo().needsStackRealignment(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 282e30c..78d9135 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -92,6 +92,8 @@ namespace ARMII { StMiscFrm = 9 << FormShift, LdStMulFrm = 10 << FormShift, + LdStExFrm = 28 << FormShift, + // Miscellaneous arithmetic instructions ArithMiscFrm = 11 << FormShift, @@ -190,9 +192,6 @@ public: // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; - // Return true if the block does not fall through. - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0; - virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 653328d..9b5f79f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -471,21 +471,6 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } } -static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { - unsigned MaxAlign = 0; - - for (int i = FFI->getObjectIndexBegin(), - e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - - unsigned Align = FFI->getObjectAlignment(i); - MaxAlign = std::max(MaxAlign, Align); - } - - return MaxAlign; -} - /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -585,16 +570,21 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector<unsigned, 4> UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - MachineFrameInfo *MFI = MF.getFrameInfo(); // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). if (RealignStack) { - unsigned MaxAlign = std::max(MFI->getMaxAlignment(), - calculateMaxStackAlignment(MFI)); - MFI->setMaxAlignment(MaxAlign); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->calculateMaxStackAlignment(); } + // Spill R4 if Thumb2 function requires stack realignment - it will be used as + // scratch register. + // FIXME: It will be better just to find spare register here. + if (needsStackRealignment(MF) && + AFI->isThumb2Function()) + MF.getRegInfo().setPhysRegUsed(ARM::R4); + // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -1368,14 +1358,30 @@ emitPrologue(MachineFunction &MF) const { // If we need dynamic stack realignment, do it here. if (needsStackRealignment(MF)) { - unsigned Opc; unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); - Opc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; - - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), ARM::SP) + if (!AFI->isThumbFunction()) { + // Emit bic sp, sp, MaxAlign + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, + TII.get(ARM::BICri), ARM::SP) .addReg(ARM::SP, RegState::Kill) .addImm(MaxAlign-1))); + } else { + // We cannot use sp as source/dest register here, thus we're emitting the + // following sequence: + // mov r4, sp + // bic r4, r4, MaxAlign + // mov sp, r4 + // FIXME: It will be better just to find spare register here. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R4) + .addReg(ARM::SP, RegState::Kill); + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, + TII.get(ARM::t2BICri), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(MaxAlign-1))); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) + .addReg(ARM::R4, RegState::Kill); + } } } @@ -1479,48 +1485,4 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } -namespace { - struct MaximalStackAlignmentCalculator : public MachineFunctionPass { - static char ID; - MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF) { - MachineFrameInfo *FFI = MF.getFrameInfo(); - MachineRegisterInfo &RI = MF.getRegInfo(); - - // Calculate max stack alignment of all already allocated stack objects. - unsigned MaxAlign = calculateMaxStackAlignment(FFI); - - // Be over-conservative: scan over all vreg defs and find, whether vector - // registers are used. If yes - there is probability, that vector register - // will be spilled and thus stack needs to be aligned properly. - for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; - RegNum < RI.getLastVirtReg(); ++RegNum) - MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); - - if (FFI->getMaxAlignment() == MaxAlign) - return false; - - FFI->setMaxAlignment(MaxAlign); - return true; - } - - virtual const char *getPassName() const { - return "ARM Stack Required Alignment Auto-Detector"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - }; - - char MaximalStackAlignmentCalculator::ID = 0; -} - -FunctionPass* -llvm::createARMMaxStackAlignmentCalculatorPass() { - return new MaximalStackAlignmentCalculator(); -} - #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index e59a315..acd30d2 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -418,10 +418,10 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; - if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. return false; - MachineBasicBlock *NextBB = next(MBBI); + MachineBasicBlock *NextBB = llvm::next(MBBI); for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) if (*I == NextBB) @@ -760,7 +760,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { CompareMBBNumbers); MachineBasicBlock* WaterBB = *IP; if (WaterBB == OrigBB) - WaterList.insert(next(IP), NewBB); + WaterList.insert(llvm::next(IP), NewBB); else WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); @@ -887,7 +887,7 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta) { - MachineFunction::iterator MBBI = BB; MBBI = next(MBBI); + MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI); for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs(); i < e; ++i) { BBOffsets[i] += delta; @@ -929,7 +929,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, if (delta==0) return; } - MBBI = next(MBBI); + MBBI = llvm::next(MBBI); } } @@ -1096,7 +1096,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, DEBUG(errs() << "Split at end of block\n"); if (&UserMBB->back() == UserMI) assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); - NewMBB = next(MachineFunction::iterator(UserMBB)); + NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); // Add an unconditional branch from UserMBB to fallthrough block. // Record it for branch lengthening; this new branch will not get out of // range, but if the preceding conditional branch is out of range, the @@ -1144,7 +1144,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); Offset < BaseInsertOffset; Offset += TII->GetInstSizeInBytes(MI), - MI = next(MI)) { + MI = llvm::next(MI)) { if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; if (!OffsetIsInRange(Offset, EndInsertOffset, @@ -1204,7 +1204,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, NewWaterList.insert(NewIsland); } // The new CPE goes before the following block (NewMBB). - NewMBB = next(MachineFunction::iterator(WaterBB)); + NewMBB = llvm::next(MachineFunction::iterator(WaterBB)); } else { // No water found. @@ -1406,7 +1406,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { NumCBrFixed++; if (BMI != MI) { - if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && + if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && BMI->getOpcode() == Br.UncondBr) { // Last MI in the BB is an unconditional branch. Can we simply invert the // condition and swap destinations: @@ -1433,12 +1433,12 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); BBSizes[MBB->getNumber()] -= delta; - MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB)); + MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB)); AdjustBBOffsetsAfter(SplitBB, -delta); MBB->back().eraseFromParent(); // BBOffsets[SplitBB] is wrong temporarily, fixed below } - MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); + MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c929c54..1b8727d 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -48,7 +48,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr &MI = *MBBI; - MachineBasicBlock::iterator NMBBI = next(MBBI); + MachineBasicBlock::iterator NMBBI = llvm::next(MBBI); unsigned Opcode = MI.getOpcode(); switch (Opcode) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c839fc6..655c762 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include <sstream> using namespace llvm; @@ -377,7 +378,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -500,6 +501,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; + case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; + case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; @@ -1470,6 +1474,28 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } } +static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + DebugLoc dl = Op.getDebugLoc(); + SDValue Op5 = Op.getOperand(5); + SDValue Res; + unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); + if (isDeviceBarrier) { + if (Subtarget->hasV7Ops()) + Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0)); + else + Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + } else { + if (Subtarget->hasV7Ops()) + Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); + else + Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + } + return Res; +} + static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, unsigned VarArgsFrameIndex) { // vastart just stores the address of the VarArgsFrameIndex slot into the @@ -2528,6 +2554,25 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, return true; } +/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. +static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((unsigned) M[i] != i + WhichResult || + (unsigned) M[i+1] != i + WhichResult) + return false; + } + return true; +} + static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -2548,6 +2593,33 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, return true; } +/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, +static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned Half = VT.getVectorNumElements() / 2; + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned j = 0; j != 2; ++j) { + unsigned Idx = WhichResult; + for (unsigned i = 0; i != Half; ++i) { + if ((unsigned) M[i + j * Half] != Idx) + return false; + Idx += 2; + } + } + + // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && EltSz == 32) + return false; + + return true; +} + static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -2571,6 +2643,33 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, return true; } +/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. +static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, + unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((unsigned) M[i] != Idx || + (unsigned) M[i+1] != Idx) + return false; + Idx += 1; + } + + // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && EltSz == 32) + return false; + + return true; +} + + static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { // Canonicalize all-zeros and all-ones vectors. ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode()); @@ -2683,7 +2782,10 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isVEXTMask(M, VT, ReverseVEXT, Imm) || isVTRNMask(M, VT, WhichResult) || isVUZPMask(M, VT, WhichResult) || - isVZIPMask(M, VT, WhichResult)); + isVZIPMask(M, VT, WhichResult) || + isVTRN_v_undef_Mask(M, VT, WhichResult) || + isVUZP_v_undef_Mask(M, VT, WhichResult) || + isVZIP_v_undef_Mask(M, VT, WhichResult)); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit @@ -2815,6 +2917,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), V1, V2).getValue(WhichResult); + if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. if (VT.getVectorNumElements() == 4 && @@ -2886,6 +2998,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: @@ -2938,14 +3051,233 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, //===----------------------------------------------------------------------===// MachineBasicBlock * +ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const { + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned oldval = MI->getOperand(2).getReg(); + unsigned newval = MI->getOperand(3).getReg(); + unsigned scratch = BB->getParent()->getRegInfo() + .createVirtualRegister(ARM::GPRRegisterClass); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); + + unsigned ldrOpc, strOpc; + switch (Size) { + default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + break; + } + + MachineFunction *MF = BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; // insert the new blocks after the current block + + MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loop1MBB); + MF->insert(It, loop2MBB); + MF->insert(It, exitMBB); + exitMBB->transferSuccessors(BB); + + // thisMBB: + // ... + // fallthrough --> loop1MBB + BB->addSuccessor(loop1MBB); + + // loop1MBB: + // ldrex dest, [ptr] + // cmp dest, oldval + // bne exitMBB + BB = loop1MBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(dest).addReg(oldval)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + + // loop2MBB: + // strex scratch, newval, [ptr] + // cmp scratch, #0 + // bne loop1MBB + BB = loop2MBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) + .addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(scratch).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + BB->addSuccessor(loop1MBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + return BB; +} + +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + unsigned Size, unsigned BinOpcode) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *F = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); + unsigned ldrOpc, strOpc; + switch (Size) { + default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + break; + } + + MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loopMBB); + F->insert(It, exitMBB); + exitMBB->transferSuccessors(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + unsigned scratch2 = (!BinOpcode) ? incr : + RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrex dest, ptr + // <binop> scratch2, dest, incr + // strex scratch, scratch2, ptr + // cmp scratch, #0 + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + if (BinOpcode) { + // operand order needs to go the other way for NAND + if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) + AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). + addReg(incr).addReg(dest)).addReg(0); + else + AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). + addReg(dest).addReg(incr)).addReg(0); + } + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) + .addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(scratch).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + return BB; +} + +MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { default: + MI->dump(); llvm_unreachable("Unexpected instr type to insert"); + + case ARM::ATOMIC_LOAD_ADD_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + case ARM::ATOMIC_LOAD_ADD_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + case ARM::ATOMIC_LOAD_ADD_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); + + case ARM::ATOMIC_LOAD_AND_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMIC_LOAD_AND_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + case ARM::ATOMIC_LOAD_AND_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); + + case ARM::ATOMIC_LOAD_OR_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMIC_LOAD_OR_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + case ARM::ATOMIC_LOAD_OR_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); + + case ARM::ATOMIC_LOAD_XOR_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMIC_LOAD_XOR_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + case ARM::ATOMIC_LOAD_XOR_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); + + case ARM::ATOMIC_LOAD_NAND_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + case ARM::ATOMIC_LOAD_NAND_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + case ARM::ATOMIC_LOAD_NAND_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); + + case ARM::ATOMIC_LOAD_SUB_I8: + return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_SUB_I16: + return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_SUB_I32: + return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + + case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); + case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); + case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); + + case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); + case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); + case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -3935,6 +4267,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, ARM::SPRRegisterClass); if (VT == MVT::f64) return std::make_pair(0U, ARM::DPRRegisterClass); + if (VT.getSizeInBits() == 128) + return std::make_pair(0U, ARM::QPRRegisterClass); break; } } @@ -3973,6 +4307,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, ARM::D4, ARM::D5, ARM::D6, ARM::D7, ARM::D8, ARM::D9, ARM::D10,ARM::D11, ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); + if (VT.getSizeInBits() == 128) + return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, + ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); break; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 4f31f8a..e1b3348 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -72,6 +72,9 @@ namespace llvm { DYN_ALLOC, // Dynamic allocation on the stack. + MEMBARRIER, // Memory barrier + SYNCBARRIER, // Memory sync barrier + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -328,6 +331,15 @@ namespace llvm { SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl); + + MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size) const; + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + unsigned BinOpcode) const; + }; } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index e76e93c..cf0edff 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -33,6 +33,8 @@ def LdMiscFrm : Format<8>; def StMiscFrm : Format<9>; def LdStMulFrm : Format<10>; +def LdStExFrm : Format<28>; + def ArithMiscFrm : Format<11>; def ExtFrm : Format<12>; @@ -199,6 +201,19 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } +// A few are not predicable +class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, + IndexMode im, Format f, InstrItinClass itin, + string opc, string asm, string cstr, + list<dag> pattern> + : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, asm); + let Pattern = pattern; + let isPredicable = 0; + list<Predicate> Predicates = [IsARM]; +} // Same as I except it can optionally modify CPSR. Note it's modeled as // an input operand since by default it's a zero register. It will @@ -239,6 +254,10 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin, string asm, list<dag> pattern> : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, asm, "", pattern>; +class AInoP<dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin, + opc, asm, "", pattern>; // Ctrl flow instructions class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin, @@ -264,6 +283,28 @@ class JTI<dag oops, dag iops, InstrItinClass itin, : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin, asm, "", pattern>; + +// Atomic load/store instructions + +class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, + opc, asm, "", pattern> { + let Inst{27-23} = 0b00011; + let Inst{22-21} = opcod; + let Inst{20} = 1; + let Inst{11-0} = 0b111110011111; +} +class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin, + opc, asm, "", pattern> { + let Inst{27-23} = 0b00011; + let Inst{22-21} = opcod; + let Inst{20} = 0; + let Inst{11-4} = 0b11111001; +} + // addrmode1 instructions class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -967,6 +1008,17 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<Predicate> Predicates = [IsThumb2]; } +class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, + InstrItinClass itin, + string asm, string cstr, list<dag> pattern> + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list<Predicate> Predicates = [IsThumb1Only]; +} + class T2I<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 87bb12b..85f6b40 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -60,25 +60,6 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case ARM::BX_RET: // Return. - case ARM::LDM_RET: - case ARM::B: - case ARM::BRIND: - case ARM::BR_JTr: // Jumptable branch. - case ARM::BR_JTm: // Jumptable branch through mem. - case ARM::BR_JTadd: // Jumptable branch add to pc. - return true; - default: - break; - } - - return false; -} - void ARMInstrInfo:: reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 4319577..d4199d1 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -32,9 +32,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return true if the block does not fall through. - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 7516d3c..e14696a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -46,6 +46,11 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>; +def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; +def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; +def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -93,6 +98,15 @@ def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; +def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, + [SDNPHasChain]>; +def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7, + [SDNPHasChain]>; +def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6, + [SDNPHasChain]>; +def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, + [SDNPHasChain]>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -772,6 +786,7 @@ let isBranch = 1, isTerminator = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), IIC_Br, "mov\tpc, $target \n$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { + let Inst{11-4} = 0b00000000; let Inst{15-12} = 0b1111; let Inst{20} = 0; // S Bit let Inst{24-21} = 0b1101; @@ -1561,6 +1576,189 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), let Inst{25} = 1; } +//===----------------------------------------------------------------------===// +// Atomic operations intrinsics +// + +// memory barriers protect the atomic sequences +let hasSideEffects = 1 in { +def Int_MemBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dmb", "", + [(ARMMemBarrierV7)]>, + Requires<[IsARM, HasV7]> { + let Inst{31-4} = 0xf57ff05; + // FIXME: add support for options other than a full system DMB + let Inst{3-0} = 0b1111; +} + +def Int_SyncBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dsb", "", + [(ARMSyncBarrierV7)]>, + Requires<[IsARM, HasV7]> { + let Inst{31-4} = 0xf57ff04; + // FIXME: add support for options other than a full system DSB + let Inst{3-0} = 0b1111; +} + +def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero), + Pseudo, NoItinerary, + "mcr", "\tp15, 0, $zero, c7, c10, 5", + [(ARMMemBarrierV6 GPR:$zero)]>, + Requires<[IsARM, HasV6]> { + // FIXME: add support for options other than a full system DMB + // FIXME: add encoding +} + +def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero), + Pseudo, NoItinerary, + "mcr", "\tp15, 0, $zero, c7, c10, 4", + [(ARMSyncBarrierV6 GPR:$zero)]>, + Requires<[IsARM, HasV6]> { + // FIXME: add support for options other than a full system DSB + // FIXME: add encoding +} +} + +let usesCustomInserter = 1 in { + let Uses = [CPSR] in { + def ATOMIC_LOAD_ADD_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_SUB_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_AND_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_OR_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_XOR_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_NAND_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!", + [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_ADD_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_SUB_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_AND_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_OR_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_XOR_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_NAND_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!", + [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_ADD_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_SUB_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_AND_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_OR_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_XOR_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_NAND_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, + "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!", + [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + + def ATOMIC_SWAP_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, + "${:comment} ATOMIC_SWAP_I8 PSEUDO!", + [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>; + def ATOMIC_SWAP_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, + "${:comment} ATOMIC_SWAP_I16 PSEUDO!", + [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>; + def ATOMIC_SWAP_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, + "${:comment} ATOMIC_SWAP_I32 PSEUDO!", + [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>; + + def ATOMIC_CMP_SWAP_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, + "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!", + [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>; + def ATOMIC_CMP_SWAP_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, + "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!", + [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>; + def ATOMIC_CMP_SWAP_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, + "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!", + [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>; +} +} + +let mayLoad = 1 in { +def LDREXB : AIldrex<0b10, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, + "ldrexb", "\t$dest, [$ptr]", + []>; +def LDREXH : AIldrex<0b11, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, + "ldrexh", "\t$dest, [$ptr]", + []>; +def LDREX : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary, + "ldrex", "\t$dest, [$ptr]", + []>; +def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), + NoItinerary, + "ldrexd", "\t$dest, $dest2, [$ptr]", + []>; +} + +let mayStore = 1 in { +def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), + NoItinerary, + "strexb", "\t$success, $src, [$ptr]", + []>; +def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), + NoItinerary, + "strexh", "\t$success, $src, [$ptr]", + []>; +def STREX : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr), + NoItinerary, + "strex", "\t$success, $src, [$ptr]", + []>; +def STREXD : AIstrex<0b01, (outs GPR:$success), + (ins GPR:$src, GPR:$src2, GPR:$ptr), + NoItinerary, + "strexd", "\t$success, $src, $src2, [$ptr]", + []>; +} //===----------------------------------------------------------------------===// // TLS Instructions diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3166931..61b7705 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -152,7 +152,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), let Inst{24} = 0; // P bit let Inst{23} = 1; // U bit let Inst{20} = 1; - let Inst{11-9} = 0b101; + let Inst{11-8} = 0b1011; } // Use vstmia to store a Q register as a D register pair. @@ -164,7 +164,7 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), let Inst{24} = 0; // P bit let Inst{23} = 1; // U bit let Inst{20} = 0; - let Inst{11-9} = 0b101; + let Inst{11-8} = 0b1011; } // VLD1 : Vector Load (multiple single elements) diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index b5956a3..9306bdb 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -669,6 +669,35 @@ let isCall = 1, [(set R0, ARMthread_pointer)]>; } +// SJLJ Exception handling intrinsics +// eh_sjlj_setjmp() is an instruction sequence to store the return +// address and save #0 in R0 for the non-longjmp case. +// Since by its nature we may be coming from some other function to get +// here, and we're using the stack frame for the containing function to +// save/restore registers, we can't keep anything live in regs across +// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon +// when we get here from a longjmp(). We force everthing out of registers +// except for our own input by listing the relevant registers in Defs. By +// doing so, we also cause the prologue/epilogue code to actively preserve +// all of the callee-saved resgisters, which is exactly what we want. +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { + def tInt_eh_sjlj_setjmp : ThumbXI<(outs), (ins GPR:$src), + AddrModeNone, SizeSpecial, NoItinerary, + "mov\tr12, r1\t@ begin eh.setjmp\n" + "\tmov\tr1, sp\n" + "\tstr\tr1, [$src, #8]\n" + "\tadr\tr1, 0f\n" + "\tadds\tr1, #1\n" + "\tstr\tr1, [$src, #4]\n" + "\tmov\tr1, r12\n" + "\tmovs\tr0, #0\n" + "\tb\t1f\n" + ".align 2\n" + "0:\tmovs\tr0, #1\t@ end eh.setjmp\n" + "1:", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; +} //===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 9489815..949ce73 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1065,6 +1065,68 @@ def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), RegConstraint<"$false = $dst">; //===----------------------------------------------------------------------===// +// Atomic operations intrinsics +// + +// memory barriers protect the atomic sequences +let hasSideEffects = 1 in { +def t2Int_MemBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dmb", "", + [(ARMMemBarrierV7)]>, + Requires<[IsThumb2]> { + // FIXME: add support for options other than a full system DMB +} + +def t2Int_SyncBarrierV7 : AInoP<(outs), (ins), + Pseudo, NoItinerary, + "dsb", "", + [(ARMSyncBarrierV7)]>, + Requires<[IsThumb2]> { + // FIXME: add support for options other than a full system DSB +} +} + +let mayLoad = 1 in { +def t2LDREXB : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrexb", "\t$dest, [$ptr]", "", + []>; +def t2LDREXH : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrexh", "\t$dest, [$ptr]", "", + []>; +def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone, + Size4Bytes, NoItinerary, + "ldrex", "\t$dest, [$ptr]", "", + []>; +def t2LDREXD : Thumb2I<(outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "ldrexd", "\t$dest, $dest2, [$ptr]", "", + []>; +} + +let mayStore = 1 in { +def t2STREXB : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexb", "\t$success, $src, [$ptr]", "", + []>; +def t2STREXH : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexh", "\t$success, $src, [$ptr]", "", + []>; +def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strex", "\t$success, $src, [$ptr]", "", + []>; +def t2STREXD : Thumb2I<(outs GPR:$success), + (ins GPR:$src, GPR:$src2, GPR:$ptr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexd", "\t$success, $src, $src2, [$ptr]", "", + []>; +} + +//===----------------------------------------------------------------------===// // TLS Instructions // diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 304d0ef..22bd80e 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -449,7 +449,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } if (MBBI != MBB.end()) { - MachineBasicBlock::iterator NextMBBI = next(MBBI); + MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); @@ -494,7 +494,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } if (MBBI != MBB.end()) { - MachineBasicBlock::iterator NextMBBI = next(MBBI); + MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); if (Mode == ARM_AM::ia && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); @@ -604,7 +604,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } if (!DoMerge && MBBI != MBB.end()) { - MachineBasicBlock::iterator NextMBBI = next(MBBI); + MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); if (!isAM5 && isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 2564ed9..1c6fca7 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -95,7 +95,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - PM.add(createARMMaxStackAlignmentCalculatorPass()); + PM.add(createMaxStackAlignmentCalculatorPass()); // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 692bb19..362bbf1 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -1045,6 +1045,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, printNoHashImmediate(MI, OpNum); return false; case 'P': // Print a VFP double precision register. + case 'q': // Print a NEON quad precision register. printOperand(MI, OpNum); return false; case 'Q': diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 50abcf4..3c0414d 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -51,7 +51,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; for (; MII != E; MII = NextMII) { - NextMII = next(MII); + NextMII = llvm::next(MII); MachineInstr *MI = &*MII; if (MI->getOpcode() == ARM::VMOVD && diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 206677b..d9942c8 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -338,7 +338,7 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - MachineBasicBlock::iterator NextI = next(MBBI); + MachineBasicBlock::iterator NextI = llvm::next(MBBI); for (unsigned R = 0; R < NumRegs; ++R) { MachineOperand &MO = MI->getOperand(FirstOpnd + R); assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 7602b6d..66d3b83 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -32,25 +32,6 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool -Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case ARM::tBX_RET: - case ARM::tBX_RET_vararg: - case ARM::tPOP_RET: - case ARM::tB: - case ARM::tBRIND: - case ARM::tBR_JTr: - return true; - default: - break; - } - - return false; -} - bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index b28229d..516ddf1 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -31,9 +31,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return true if the block does not fall through. - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 37adf37..9f3816a 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -528,7 +528,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i+1).ChangeToImmediate(Mask); } Offset = (Offset - Mask * Scale); - MachineBasicBlock::iterator NII = next(II); + MachineBasicBlock::iterator NII = llvm::next(II); emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, *this, dl); } else { diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 16c1e6f..f4a8c27 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -36,30 +36,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { } bool -Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case ARM::t2LDM_RET: - case ARM::t2B: // Uncond branch. - case ARM::t2BR_JT: // Jumptable branch. - case ARM::t2TBB: // Table branch byte. - case ARM::t2TBH: // Table branch halfword. - case ARM::tBR_JTr: // Jumptable branch (16-bit version). - case ARM::tBX_RET: - case ARM::tBX_RET_vararg: - case ARM::tPOP_RET: - case ARM::tB: - case ARM::tBRIND: - return true; - default: - break; - } - - return false; -} - -bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 663a60b..a0f89a6 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -31,9 +31,6 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - // Return true if the block does not fall through. - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index b2fd7b3..35359aa 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -649,7 +649,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; for (; MII != E; MII = NextMII) { - NextMII = next(MII); + NextMII = llvm::next(MII); MachineInstr *MI = &*MII; LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 86173ff..39f0749 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -392,18 +392,6 @@ void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, .addReg(Alpha::R31); } -bool AlphaInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case Alpha::RETDAG: // Return. - case Alpha::RETDAGp: - case Alpha::BR: // Uncond branch. - case Alpha::JMP: // Indirect branch. - return true; - default: return false; - } -} bool AlphaInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 2 && "Invalid Alpha branch opcode!"); diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index 274f452..c3b6044 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -78,7 +78,6 @@ public: unsigned RemoveBranch(MachineBasicBlock &MBB) const; void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; /// getGlobalBaseReg - Return a virtual register initialized with the diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 9e4fe27..a52ca79 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -2573,7 +2573,7 @@ void CWriter::visitSwitchInst(SwitchInst &SI) { BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1)); printPHICopiesForSuccessor (SI.getParent(), Succ, 2); printBranchToBlock(SI.getParent(), Succ, 2); - if (Function::iterator(Succ) == next(Function::iterator(SI.getParent()))) + if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent()))) Out << " break;\n"; } Out << " }\n"; @@ -2593,7 +2593,7 @@ bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) { /// FIXME: This should be reenabled, but loop reordering safe!! return true; - if (next(Function::iterator(From)) != Function::iterator(To)) + if (llvm::next(Function::iterator(From)) != Function::iterator(To)) return true; // Not the direct successor, we need a goto. //isa<SwitchInst>(From->getTerminator()) diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index ecce8e3..2306665 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -580,10 +580,6 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } } -bool -SPUInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - return (!MBB.empty() && isUncondBranch(&MBB.back())); -} //! Reverses a branch's condition, returning false on success. bool SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index c644a11..42677fc 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -79,9 +79,6 @@ namespace llvm { bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; - //! Return true if the specified block does not fall through - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - //! Reverses a branch's condition, returning false on success. virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 4bae6c7..a872fbd 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -976,21 +976,20 @@ namespace { nl(Out); printType(GV->getType()); if (GV->hasInitializer()) { - Constant* Init = GV->getInitializer(); + Constant *Init = GV->getInitializer(); printType(Init->getType()); - if (Function* F = dyn_cast<Function>(Init)) { + if (Function *F = dyn_cast<Function>(Init)) { nl(Out)<< "/ Function Declarations"; nl(Out); printFunctionHead(F); } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { nl(Out) << "// Global Variable Declarations"; nl(Out); printVariableHead(gv); - } else { - nl(Out) << "// Constant Definitions"; nl(Out); - printConstant(gv); - } - if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { + nl(Out) << "// Global Variable Definitions"; nl(Out); printVariableBody(gv); + } else { + nl(Out) << "// Constant Definitions"; nl(Out); + printConstant(Init); } } } diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index beccb2c..4edf422 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -86,10 +86,10 @@ namespace { void dump() { errs() << "MSP430ISelAddressMode " << this << '\n'; - if (Base.Reg.getNode() != 0) { + if (BaseType == RegBase && Base.Reg.getNode() != 0) { errs() << "Base.Reg "; Base.Reg.getNode()->dump(); - } else { + } else if (BaseType == FrameIndexBase) { errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'; } errs() << " Disp " << Disp << '\n'; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 29cc370..5fe9b20 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -15,6 +15,7 @@ #include "MSP430ISelLowering.h" #include "MSP430.h" +#include "MSP430MachineFunctionInfo.h" #include "MSP430TargetMachine.h" #include "MSP430Subtarget.h" #include "llvm/DerivedTypes.h" @@ -32,16 +33,38 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/VectorExtras.h" using namespace llvm; +typedef enum { + NoHWMult, + HWMultIntr, + HWMultNoIntr +} HWMultUseMode; + +static cl::opt<HWMultUseMode> +HWMultMode("msp430-hwmult-mode", + cl::desc("Hardware multiplier use mode"), + cl::init(HWMultNoIntr), + cl::values( + clEnumValN(NoHWMult, "no", + "Do not use hardware multiplier"), + clEnumValN(HWMultIntr, "interrupts", + "Assume hardware multiplier can be used inside interrupts"), + clEnumValN(HWMultNoIntr, "use", + "Assume hardware multiplier cannot be used inside interrupts"), + clEnumValEnd)); + MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : TargetLowering(tm, new TargetLoweringObjectFileELF()), Subtarget(*tm.getSubtargetImpl()), TM(tm) { + TD = getTargetData(); + // Set up the register classes. addRegisterClass(MVT::i8, MSP430::GR8RegisterClass); addRegisterClass(MVT::i16, MSP430::GR16RegisterClass); @@ -92,8 +115,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::BR_CC, MVT::i8, Custom); setOperationAction(ISD::BR_CC, MVT::i16, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Expand); - setOperationAction(ISD::SETCC, MVT::i8, Expand); - setOperationAction(ISD::SETCC, MVT::i16, Expand); + setOperationAction(ISD::SETCC, MVT::i8, Custom); + setOperationAction(ISD::SETCC, MVT::i16, Custom); setOperationAction(ISD::SELECT, MVT::i8, Expand); setOperationAction(ISD::SELECT, MVT::i16, Expand); setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); @@ -142,6 +165,15 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::SDIV, MVT::i16, Expand); setOperationAction(ISD::SDIVREM, MVT::i16, Expand); setOperationAction(ISD::SREM, MVT::i16, Expand); + + // Libcalls names. + if (HWMultMode == HWMultIntr) { + setLibcallName(RTLIB::MUL_I8, "__mulqi3hw"); + setLibcallName(RTLIB::MUL_I16, "__mulhi3hw"); + } else if (HWMultMode == HWMultNoIntr) { + setLibcallName(RTLIB::MUL_I8, "__mulqi3hw_noint"); + setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint"); + } } SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { @@ -151,9 +183,12 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SRA: return LowerShifts(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); default: llvm_unreachable("unimplemented operand"); return SDValue(); @@ -225,6 +260,13 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, case CallingConv::C: case CallingConv::Fast: return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); + case CallingConv::MSP430_INTR: + if (Ins.empty()) + return Chain; + else { + llvm_report_error("ISRs cannot have arguments"); + return SDValue(); + } } } @@ -244,6 +286,9 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, Ins, dl, DAG, InVals); + case CallingConv::MSP430_INTR: + llvm_report_error("ISRs cannot be called directly"); + return SDValue(); } } @@ -340,6 +385,12 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, // CCValAssign - represent the assignment of the return value to a location SmallVector<CCValAssign, 16> RVLocs; + // ISRs cannot return any value. + if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) { + llvm_report_error("ISRs cannot return any value"); + return SDValue(); + } + // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext()); @@ -370,11 +421,14 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); } + unsigned Opc = (CallConv == CallingConv::MSP430_INTR ? + MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG); + if (Flag.getNode()) - return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag); // Return Void - return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain); + return DAG.getNode(Opc, dl, MVT::Other, Chain); } /// LowerCCCCallTo - functions arguments are copied from virtual regs to @@ -538,9 +592,21 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op, EVT VT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); - // We currently only lower shifts of constant argument. + // Expand non-constant shifts to loops: if (!isa<ConstantSDNode>(N->getOperand(1))) - return SDValue(); + switch (Opc) { + default: + assert(0 && "Invalid shift opcode!"); + case ISD::SHL: + return DAG.getNode(MSP430ISD::SHL, dl, + VT, N->getOperand(0), N->getOperand(1)); + case ISD::SRA: + return DAG.getNode(MSP430ISD::SRA, dl, + VT, N->getOperand(0), N->getOperand(1)); + case ISD::SRL: + return DAG.getNode(MSP430ISD::SRL, dl, + VT, N->getOperand(0), N->getOperand(1)); + } uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); @@ -648,6 +714,88 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { Chain, Dest, TargetCC, Flag); } + +SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + // If we are doing an AND and testing against zero, then the CMP + // will not be generated. The AND (or BIT) will generate the condition codes, + // but they are different from CMP. + bool andCC = false; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { + if (RHSC->isNullValue() && LHS.hasOneUse() && + (LHS.getOpcode() == ISD::AND || + (LHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getOpcode() == ISD::AND))) { + andCC = true; + } + } + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + SDValue TargetCC; + SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); + + // Get the condition codes directly from the status register, if its easy. + // Otherwise a branch will be generated. Note that the AND and BIT + // instructions generate different flags than CMP, the carry bit can be used + // for NE/EQ. + bool Invert = false; + bool Shift = false; + bool Convert = true; + switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) { + default: + Convert = false; + break; + case MSP430CC::COND_HS: + // Res = SRW & 1, no processing is required + break; + case MSP430CC::COND_LO: + // Res = ~(SRW & 1) + Invert = true; + break; + case MSP430CC::COND_NE: + if (andCC) { + // C = ~Z, thus Res = SRW & 1, no processing is required + } else { + // Res = (SRW >> 1) & 1 + Shift = true; + } + break; + case MSP430CC::COND_E: + if (andCC) { + // C = ~Z, thus Res = ~(SRW & 1) + } else { + // Res = ~((SRW >> 1) & 1) + Shift = true; + } + Invert = true; + break; + } + EVT VT = Op.getValueType(); + SDValue One = DAG.getConstant(1, VT); + if (Convert) { + SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW, + MVT::i16, Flag); + if (Shift) + // FIXME: somewhere this is turned into a SRL, lower it MSP specific? + SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One); + SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One); + if (Invert) + SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One); + return SR; + } else { + SDValue Zero = DAG.getConstant(0, VT); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SmallVector<SDValue, 4> Ops; + Ops.push_back(One); + Ops.push_back(Zero); + Ops.push_back(TargetCC); + Ops.push_back(Flag); + return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size()); + } +} + SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -682,6 +830,55 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, DAG.getValueType(Val.getValueType())); } +SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>(); + int ReturnAddrIndex = FuncInfo->getRAIndex(); + + if (ReturnAddrIndex == 0) { + // Set up a frame object for the return address. + uint64_t SlotSize = TD->getPointerSize(); + ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, + true, false); + FuncInfo->setRAIndex(ReturnAddrIndex); + } + + return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); +} + +SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + DebugLoc dl = Op.getDebugLoc(); + + if (Depth > 0) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = + DAG.getConstant(TD->getPointerSize(), MVT::i16); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, getPointerTy(), + FrameAddr, Offset), + NULL, 0); + } + + // Just load the return address. + SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + RetAddrFI, NULL, 0); +} + +SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + MSP430::FPW, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + return FrameAddr; +} + /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. @@ -722,6 +919,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return NULL; case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG"; + case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG"; case MSP430ISD::RRA: return "MSP430ISD::RRA"; case MSP430ISD::RLA: return "MSP430ISD::RLA"; case MSP430ISD::RRC: return "MSP430ISD::RRC"; @@ -730,6 +928,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC"; case MSP430ISD::CMP: return "MSP430ISD::CMP"; case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC"; + case MSP430ISD::SHL: return "MSP430ISD::SHL"; + case MSP430ISD::SRA: return "MSP430ISD::SRA"; } } @@ -738,13 +938,131 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { //===----------------------------------------------------------------------===// MachineBasicBlock* +MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineFunction *F = BB->getParent(); + MachineRegisterInfo &RI = F->getRegInfo(); + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + + unsigned Opc; + const TargetRegisterClass * RC; + switch (MI->getOpcode()) { + default: + assert(0 && "Invalid shift opcode!"); + case MSP430::Shl8: + Opc = MSP430::SHL8r1; + RC = MSP430::GR8RegisterClass; + break; + case MSP430::Shl16: + Opc = MSP430::SHL16r1; + RC = MSP430::GR16RegisterClass; + break; + case MSP430::Sra8: + Opc = MSP430::SAR8r1; + RC = MSP430::GR8RegisterClass; + break; + case MSP430::Sra16: + Opc = MSP430::SAR16r1; + RC = MSP430::GR16RegisterClass; + break; + case MSP430::Srl8: + Opc = MSP430::SAR8r1c; + RC = MSP430::GR8RegisterClass; + break; + case MSP430::Srl16: + Opc = MSP430::SAR16r1c; + RC = MSP430::GR16RegisterClass; + break; + } + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator I = BB; + ++I; + + // Create loop block + MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB); + + F->insert(I, LoopBB); + F->insert(I, RemBB); + + // Update machine-CFG edges by transferring all successors of the current + // block to the block containing instructions after shift. + RemBB->transferSuccessors(BB); + + // Inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + EM->insert(std::make_pair(*SI, RemBB)); + + // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB + BB->addSuccessor(LoopBB); + BB->addSuccessor(RemBB); + LoopBB->addSuccessor(RemBB); + LoopBB->addSuccessor(LoopBB); + + unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass); + unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass); + unsigned ShiftReg = RI.createVirtualRegister(RC); + unsigned ShiftReg2 = RI.createVirtualRegister(RC); + unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + + // BB: + // cmp 0, N + // je RemBB + BuildMI(BB, dl, TII.get(MSP430::CMP8ir)) + .addImm(0).addReg(ShiftAmtSrcReg); + BuildMI(BB, dl, TII.get(MSP430::JCC)) + .addMBB(RemBB) + .addImm(MSP430CC::COND_E); + + // LoopBB: + // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] + // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] + // ShiftReg2 = shift ShiftReg + // ShiftAmt2 = ShiftAmt - 1; + BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg) + .addReg(SrcReg).addMBB(BB) + .addReg(ShiftReg2).addMBB(LoopBB); + BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg) + .addReg(ShiftAmtSrcReg).addMBB(BB) + .addReg(ShiftAmtReg2).addMBB(LoopBB); + BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2) + .addReg(ShiftReg); + BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2) + .addReg(ShiftAmtReg).addImm(1); + BuildMI(LoopBB, dl, TII.get(MSP430::JCC)) + .addMBB(LoopBB) + .addImm(MSP430CC::COND_NE); + + // RemBB: + // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] + BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg) + .addReg(SrcReg).addMBB(BB) + .addReg(ShiftReg2).addMBB(LoopBB); + + return RemBB; +} + +MachineBasicBlock* MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + unsigned Opc = MI->getOpcode(); + + if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 || + Opc == MSP430::Sra8 || Opc == MSP430::Sra16 || + Opc == MSP430::Srl8 || Opc == MSP430::Srl16) + return EmitShiftInstr(MI, BB, EM); + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - assert((MI->getOpcode() == MSP430::Select16 || - MI->getOpcode() == MSP430::Select8) && + + assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) && "Unexpected instr type to insert"); // To "insert" a SELECT instruction, we actually have to insert the diamond diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index d413ccb..4921500 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -27,6 +27,9 @@ namespace llvm { /// Return with a flag operand. Operand 0 is the chain operand. RET_FLAG, + /// Same as RET_FLAG, but used for returning from ISRs. + RETI_FLAG, + /// Y = R{R,L}A X, rotate right (left) arithmetically RRA, RLA, @@ -44,7 +47,7 @@ namespace llvm { /// CMP - Compare instruction. CMP, - /// SetCC. Operand 0 is condition code, and operand 1 is the flag + /// SetCC - Operand 0 is condition code, and operand 1 is the flag /// operand produced by a CMP instruction. SETCC, @@ -54,9 +57,12 @@ namespace llvm { /// instruction. BR_CC, - /// SELECT_CC. Operand 0 and operand 1 are selection variable, operand 3 + /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3 /// is condition code and operand 4 is flag operand. - SELECT_CC + SELECT_CC, + + /// SHL, SRA, SRL - Non-constant shifts. + SHL, SRA, SRL }; } @@ -81,8 +87,12 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG); + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const; @@ -92,6 +102,9 @@ namespace llvm { MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock* EmitShiftInstr(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; private: SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, @@ -144,6 +157,7 @@ namespace llvm { const MSP430Subtarget &Subtarget; const MSP430TargetMachine &TM; + const TargetData *TD; }; } // namespace llvm diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index b2f09c7..2ae6759 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -219,17 +219,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } -bool MSP430InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{ - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case MSP430::RET: // Return. - case MSP430::JMP: // Uncond branch. - return true; - default: return false; - } -} - bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isTerminator()) return false; @@ -270,8 +259,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (next(I) != MBB.end()) - next(I)->eraseFromParent(); + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); Cond.clear(); FBB = 0; diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 35e35db..e4ceeb9 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -61,7 +61,6 @@ public: // Branch folding goodness bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; bool isUnpredicatedTerminator(const MachineInstr *MI) const; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 7a26f6c..d67ba90 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -31,12 +31,15 @@ def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>; def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>; //===----------------------------------------------------------------------===// // MSP430 Specific Node Definitions. //===----------------------------------------------------------------------===// -def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; +def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; +def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>; def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; @@ -54,6 +57,9 @@ def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>; def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>; def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>; def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>; +def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>; +def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>; +def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>; //===----------------------------------------------------------------------===// // MSP430 Operand Definitions. @@ -90,7 +96,9 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>; // Pattern Fragments def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>; def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>; - +def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; //===----------------------------------------------------------------------===// // Instruction list.. @@ -117,6 +125,27 @@ let usesCustomInserter = 1 in { "# Select16 PSEUDO", [(set GR16:$dst, (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>; + let Defs = [SRW] in { + def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), + "# Shl8 PSEUDO", + [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>; + def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt), + "# Shl16 PSEUDO", + [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>; + def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), + "# Sra8 PSEUDO", + [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>; + def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt), + "# Sra16 PSEUDO", + [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>; + def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), + "# Srl8 PSEUDO", + [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>; + def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt), + "# Srl16 PSEUDO", + [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>; + + } } let neverHasSideEffects = 1 in @@ -128,7 +157,8 @@ def NOP : Pseudo<(outs), (ins), "nop", []>; // FIXME: Provide proper encoding! let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>; + def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>; + def RETI : Pseudo<(outs), (ins), "reti", [(MSP430retiflag)]>; } let isBranch = 1, isTerminator = 1 in { @@ -823,6 +853,65 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), "cmp.w\t{$src1, $src2}", [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>; + +// BIT TESTS, just sets condition codes +// Note that the C condition is set differently than when using CMP. +let isCommutable = 1 in { +def BIT8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR8:$src1, GR8:$src2)), + (implicit SRW)]>; +def BIT16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR16:$src1, GR16:$src2)), + (implicit SRW)]>; +} +def BIT8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR8:$src1, imm:$src2)), + (implicit SRW)]>; +def BIT16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR16:$src1, imm:$src2)), + (implicit SRW)]>; + +def BIT8rm : Pseudo<(outs), (ins GR8:$src1, memdst:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR8:$src1, (load addr:$src2))), + (implicit SRW)]>; +def BIT16rm : Pseudo<(outs), (ins GR16:$src1, memdst:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su GR16:$src1, (load addr:$src2))), + (implicit SRW)]>; + +def BIT8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (load addr:$src1), GR8:$src2)), + (implicit SRW)]>; +def BIT16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (load addr:$src1), GR16:$src2)), + (implicit SRW)]>; + +def BIT8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (load addr:$src1), (i8 imm:$src2))), + (implicit SRW)]>; +def BIT16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (load addr:$src1), (i16 imm:$src2))), + (implicit SRW)]>; + +def BIT8mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2), + "bit.b\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (i8 (load addr:$src1)), + (load addr:$src2))), + (implicit SRW)]>; +def BIT16mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2), + "bit.w\t{$src2, $src1}", + [(MSP430cmp 0, (and_su (i16 (load addr:$src1)), + (load addr:$src2))), + (implicit SRW)]>; } // Defs = [SRW] //===----------------------------------------------------------------------===// @@ -905,3 +994,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), // peephole patterns def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>; +def : Pat<(MSP430cmp 0, (trunc (and_su GR16:$src1, GR16:$src2))), + (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit), + (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>; diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h index 1d26ae3..383fd2e 100644 --- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -25,14 +25,20 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo { /// stack frame in bytes. unsigned CalleeSavedFrameSize; + /// ReturnAddrIndex - FrameIndex for return slot. + int ReturnAddrIndex; + public: MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {} explicit MSP430MachineFunctionInfo(MachineFunction &MF) - : CalleeSavedFrameSize(0) {} + : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {} unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } + + int getRAIndex() const { return ReturnAddrIndex; } + void setRAIndex(int Index) { ReturnAddrIndex = Index; } }; } // End llvm namespace diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 92baad9..e85c7a2 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -17,6 +17,7 @@ #include "MSP430MachineFunctionInfo.h" #include "MSP430RegisterInfo.h" #include "MSP430TargetMachine.h" +#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -37,17 +38,26 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm, const unsigned* MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const Function* F = MF->getFunction(); static const unsigned CalleeSavedRegs[] = { MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, 0 }; + static const unsigned CalleeSavedRegsIntr[] = { + MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, + MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, + MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W, + 0 + }; - return CalleeSavedRegs; + return (F->getCallingConv() == CallingConv::MSP430_INTR ? + CalleeSavedRegsIntr : CalleeSavedRegs); } const TargetRegisterClass *const * MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + const Function* F = MF->getFunction(); static const TargetRegisterClass * const CalleeSavedRegClasses[] = { &MSP430::GR16RegClass, &MSP430::GR16RegClass, &MSP430::GR16RegClass, &MSP430::GR16RegClass, @@ -55,8 +65,18 @@ MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &MSP430::GR16RegClass, &MSP430::GR16RegClass, 0 }; + static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = { + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + &MSP430::GR16RegClass, &MSP430::GR16RegClass, + 0 + }; - return CalleeSavedRegClasses; + return (F->getCallingConv() == CallingConv::MSP430_INTR ? + CalleeSavedRegClassesIntr : CalleeSavedRegClasses); } BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { @@ -193,10 +213,10 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // We need to materialize the offset via add instruction. unsigned DstReg = MI.getOperand(0).getReg(); if (Offset < 0) - BuildMI(MBB, next(II), dl, TII.get(MSP430::SUB16ri), DstReg) + BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg) .addReg(DstReg).addImm(-Offset); else - BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg) + BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg) .addReg(DstReg).addImm(Offset); return 0; @@ -251,7 +271,7 @@ void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(MSP430::SPW); // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(MSP430::FPW); @@ -292,7 +312,8 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF, DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { - case MSP430::RET: break; // These are ok + case MSP430::RET: + case MSP430::RETI: break; // These are ok default: llvm_unreachable("Can only insert epilog into returning blocks"); } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 6d8e160..48b9bdf 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -590,22 +590,6 @@ RemoveBranch(MachineBasicBlock &MBB) const return 2; } -/// BlockHasNoFallThrough - Analyze if MachineBasicBlock does not -/// fall-through into its successor block. -bool MipsInstrInfo:: -BlockHasNoFallThrough(const MachineBasicBlock &MBB) const -{ - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case Mips::RET: // Return. - case Mips::JR: // Indirect branch. - case Mips::J: // Uncond branch. - return true; - default: return false; - } -} - /// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool MipsInstrInfo:: diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 249d3de..ab8dc59 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -232,7 +232,6 @@ public: return 0; } - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 0083598..af7d812 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -740,18 +740,6 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, } -bool PPCInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case PPC::BLR: // Return. - case PPC::B: // Uncond branch. - case PPC::BCTR: // Indirect branch. - return true; - default: return false; - } -} - bool PPCInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index bb0dc15a..57facac 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -143,7 +143,6 @@ public: virtual bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 2d8a687..e1772c2 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -801,8 +801,21 @@ void bar(unsigned n) { true(); } -I think this basically amounts to a dag combine to simplify comparisons against -multiply hi's into a comparison against the mullo. +This is equivalent to the following, where 2863311531 is the multiplicative +inverse of 3, and 1431655766 is ((2^32)-1)/3+1: +void bar(unsigned n) { + if (n * 2863311531U < 1431655766U) + true(); +} + +The same transformation can work with an even modulo with the addition of a +rotate: rotate the result of the multiply to the right by the number of bits +which need to be zero for the condition to be true, and shrink the compare RHS +by the same amount. Unless the target supports rotates, though, that +transformation probably isn't worthwhile. + +The transformation can also easily be made to work with non-zero equality +comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0". //===---------------------------------------------------------------------===// @@ -823,20 +836,6 @@ int main() { //===---------------------------------------------------------------------===// -Instcombine will merge comparisons like (x >= 10) && (x < 20) by producing (x - -10) u< 10, but only when the comparisons have matching sign. - -This could be converted with a similiar technique. (PR1941) - -define i1 @test(i8 %x) { - %A = icmp uge i8 %x, 5 - %B = icmp slt i8 %x, 20 - %C = and i1 %A, %B - ret i1 %C -} - -//===---------------------------------------------------------------------===// - These functions perform the same computation, but produce different assembly. define i8 @select(i8 %x) readnone nounwind { @@ -884,18 +883,6 @@ The expression should optimize to something like //===---------------------------------------------------------------------===// -From GCC Bug 15241: -unsigned int -foo (unsigned int a, unsigned int b) -{ - if (a <= 7 && b <= 7) - baz (); -} -Should combine to "(a|b) <= 7". Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - From GCC Bug 3756: int pn (int n) @@ -907,19 +894,6 @@ Should combine to (n >> 31) | 1. Currently not optimized with "clang //===---------------------------------------------------------------------===// -From GCC Bug 28685: -int test(int a, int b) -{ - int lt = a < b; - int eq = a == b; - - return (lt || eq); -} -Should combine to "a <= b". Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts | llc". - -//===---------------------------------------------------------------------===// - void a(int variable) { if (variable == 4 || variable == 6) @@ -993,12 +967,6 @@ Should combine to 0. Currently not optimized with "clang //===---------------------------------------------------------------------===// -int a(unsigned char* b) {return *b > 99;} -There's an unnecessary zext in the generated code with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;} Should be combined to "((b >> 1) | b) & 1". Currently not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". @@ -1011,12 +979,6 @@ Should combine to "x | (y & 3)". Currently not optimized with "clang //===---------------------------------------------------------------------===// -unsigned a(unsigned a) {return ((a | 1) & 3) | (a & -4);} -Should combine to "a | 1". Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts". - -//===---------------------------------------------------------------------===// - int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);} Should fold to "(~a & c) | (a & b)". Currently not optimized with "clang -emit-llvm-bc | opt -std-compile-opts". @@ -1704,3 +1666,50 @@ need all but the bottom two bits from %A, and if we gave that mask to SDB it would delete the or instruction for us. //===---------------------------------------------------------------------===// + +FunctionAttrs is not marking this function as readnone (just readonly): +$ clang t.c -emit-llvm -S -o - -O0 | opt -mem2reg -S -functionattrs + +int t(int a, int b, int c) { + int *p; + if (a) + p = &a; + else + p = &c; + return *p; +} + +This is because we codegen this to: + +define i32 @t(i32 %a, i32 %b, i32 %c) nounwind readonly ssp { +entry: + %a.addr = alloca i32 ; <i32*> [#uses=3] + %c.addr = alloca i32 ; <i32*> [#uses=2] +... + +if.end: + %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] + %tmp2 = load i32* %p.0 ; <i32> [#uses=1] + ret i32 %tmp2 +} + +And functionattrs doesn't realize that the p.0 load points to function local +memory. + +Also, functionattrs doesn't know about memcpy/memset. This function should be +marked readnone, since it only twiddles local memory, but functionattrs doesn't +handle memset/memcpy/memmove aggressively: + +struct X { int *p; int *q; }; +int foo() { + int i = 0, j = 1; + struct X x, y; + int **p; + y.p = &i; + x.q = &j; + p = __builtin_memcpy (&x, &y, sizeof (int *)); + return **p; +} + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index d82d928..5fa7e8c 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -402,18 +402,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } -bool SystemZInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{ - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case SystemZ::RET: // Return. - case SystemZ::JMP: // Uncond branch. - case SystemZ::JMPr: // Indirect branch. - return true; - default: return false; - } -} - bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isTerminator()) return false; @@ -454,8 +442,8 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (next(I) != MBB.end()) - next(I)->eraseFromParent(); + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); Cond.clear(); FBB = 0; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index e16d704..ef3b39e 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -89,7 +89,6 @@ public: const std::vector<CalleeSavedInfo> &CSI) const; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 4d1c01f..1318195 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -247,7 +247,7 @@ void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(SystemZ::R15D); // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(SystemZ::R11D); diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index fc71bc3..9434a19 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -117,14 +117,6 @@ TargetAlignElem::operator==(const TargetAlignElem &rhs) const { && TypeBitWidth == rhs.TypeBitWidth); } -std::ostream & -TargetAlignElem::dump(std::ostream &os) const { - return os << AlignType - << TypeBitWidth - << ":" << (int) (ABIAlign * 8) - << ":" << (int) (PrefAlign * 8); -} - const TargetAlignElem TargetData::InvalidAlignmentElem = TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0); @@ -323,11 +315,10 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, : Alignments[BestMatchIdx].PrefAlign; } -typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; - -namespace llvm { +namespace { class StructLayoutMap : public AbstractTypeUser { + typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; LayoutInfoTy LayoutInfo; /// refineAbstractType - The callback method invoked when an abstract type is @@ -336,19 +327,11 @@ class StructLayoutMap : public AbstractTypeUser { /// virtual void refineAbstractType(const DerivedType *OldTy, const Type *) { - const StructType *STy = dyn_cast<const StructType>(OldTy); - if (!STy) { - OldTy->removeAbstractTypeUser(this); - return; - } - - StructLayout *SL = LayoutInfo[STy]; - if (SL) { - SL->~StructLayout(); - free(SL); - LayoutInfo[STy] = NULL; - } - + const StructType *STy = cast<const StructType>(OldTy); + LayoutInfoTy::iterator Iter = LayoutInfo.find(STy); + Iter->second->~StructLayout(); + free(Iter->second); + LayoutInfo.erase(Iter); OldTy->removeAbstractTypeUser(this); } @@ -358,70 +341,43 @@ class StructLayoutMap : public AbstractTypeUser { /// This method notifies ATU's when this occurs for a type. /// virtual void typeBecameConcrete(const DerivedType *AbsTy) { - const StructType *STy = dyn_cast<const StructType>(AbsTy); - if (!STy) { - AbsTy->removeAbstractTypeUser(this); - return; - } - - StructLayout *SL = LayoutInfo[STy]; - if (SL) { - SL->~StructLayout(); - free(SL); - LayoutInfo[STy] = NULL; - } - + const StructType *STy = cast<const StructType>(AbsTy); + LayoutInfoTy::iterator Iter = LayoutInfo.find(STy); + Iter->second->~StructLayout(); + free(Iter->second); + LayoutInfo.erase(Iter); AbsTy->removeAbstractTypeUser(this); } - bool insert(const Type *Ty) { - if (Ty->isAbstract()) - Ty->addAbstractTypeUser(this); - return true; - } - public: virtual ~StructLayoutMap() { // Remove any layouts. for (LayoutInfoTy::iterator - I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) - if (StructLayout *SL = I->second) { - SL->~StructLayout(); - free(SL); - } - } + I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) { + const Type *Key = I->first; + StructLayout *Value = I->second; - inline LayoutInfoTy::iterator begin() { - return LayoutInfo.begin(); - } - inline LayoutInfoTy::iterator end() { - return LayoutInfo.end(); - } - inline LayoutInfoTy::const_iterator begin() const { - return LayoutInfo.begin(); - } - inline LayoutInfoTy::const_iterator end() const { - return LayoutInfo.end(); - } + if (Key->isAbstract()) + Key->removeAbstractTypeUser(this); - LayoutInfoTy::iterator find(const StructType *&Val) { - return LayoutInfo.find(Val); - } - LayoutInfoTy::const_iterator find(const StructType *&Val) const { - return LayoutInfo.find(Val); + Value->~StructLayout(); + free(Value); + } } - bool erase(const StructType *&Val) { - return LayoutInfo.erase(Val); - } - bool erase(LayoutInfoTy::iterator I) { - return LayoutInfo.erase(I); + void InvalidateEntry(const StructType *Ty) { + LayoutInfoTy::iterator I = LayoutInfo.find(Ty); + if (I == LayoutInfo.end()) return; + + I->second->~StructLayout(); + free(I->second); + LayoutInfo.erase(I); + + if (Ty->isAbstract()) + Ty->removeAbstractTypeUser(this); } - StructLayout *&operator[](const Type *Key) { - const StructType *STy = dyn_cast<const StructType>(Key); - assert(STy && "Trying to access the struct layout map with a non-struct!"); - insert(STy); + StructLayout *&operator[](const StructType *STy) { return LayoutInfo[STy]; } @@ -429,17 +385,18 @@ public: virtual void dump() const {} }; -} // end namespace llvm +} // end anonymous namespace TargetData::~TargetData() { - delete LayoutMap; + delete static_cast<StructLayoutMap*>(LayoutMap); } const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { if (!LayoutMap) LayoutMap = new StructLayoutMap(); - StructLayout *&SL = (*LayoutMap)[Ty]; + StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap); + StructLayout *&SL = (*STM)[Ty]; if (SL) return SL; // Otherwise, create the struct layout. Because it is variable length, we @@ -453,6 +410,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { SL = L; new (L) StructLayout(Ty, *this); + + if (Ty->isAbstract()) + Ty->addAbstractTypeUser(STM); + return L; } @@ -463,15 +424,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { if (!LayoutMap) return; // No cache. - DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty); - if (I == LayoutMap->end()) return; - - I->second->~StructLayout(); - free(I->second); - LayoutMap->erase(I); + StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap); + STM->InvalidateEntry(Ty); } - std::string TargetData::getStringRepresentation() const { std::string Result; raw_string_ostream OS(Result); diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index a167118..684c61f 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -62,11 +62,6 @@ MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM); /// FunctionPass *createEmitX86CodeToMemory(); -/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass -/// which calculates maximal stack alignment required for function -/// -FunctionPass *createX86MaxStackAlignmentCalculatorPass(); - extern Target TheX86_32Target, TheX86_64Target; } // End llvm namespace diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index afd5525..5017af2 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/StringSet.h" +#include "X86MachineFunctionInfo.h" namespace llvm { class X86MachineFunctionInfo; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index d77f039..12d3d04 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -64,11 +64,18 @@ def RetCC_X86_32_C : CallingConv<[ // X86-32 FastCC return-value convention. def RetCC_X86_32_Fast : CallingConv<[ // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has - // SSE2, otherwise it is the the C calling conventions. + // SSE2. // This can happen when a float, 2 x float, or 3 x float vector is split by // target lowering, and is returned in 1-3 sse regs. CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, + + // For integers, ECX can be used as an extra return register + CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>, + CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>, + CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>, + + // Otherwise, it is the same as the common X86 calling convention. CCDelegateTo<RetCC_X86Common> ]>; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index a2fe9b0..044bd4b 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -289,7 +289,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { while (Start != BB.begin() && prior(Start) != PrevI) --Start; errs() << "Inserted instructions:\n\t"; Start->print(errs(), &MF.getTarget()); - while (++Start != next(I)) {} + while (++Start != llvm::next(I)) {} } dumpStack(); ); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d80b8ec..0517b56 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -595,6 +595,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand); + setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand); + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); } // FIXME: In order to prevent SSE instructions being expanded to MMX ones @@ -671,8 +683,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); - setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand); setOperationAction(ISD::SELECT, MVT::v8i8, Promote); setOperationAction(ISD::SELECT, MVT::v4i16, Promote); setOperationAction(ISD::SELECT, MVT::v2i32, Promote); @@ -3344,6 +3354,82 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, } SDValue +X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, + SelectionDAG &DAG) { + + // Check if the scalar load can be widened into a vector load. And if + // the address is "base + cst" see if the cst can be "absorbed" into + // the shuffle mask. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) { + SDValue Ptr = LD->getBasePtr(); + if (!ISD::isNormalLoad(LD) || LD->isVolatile()) + return SDValue(); + EVT PVT = LD->getValueType(0); + if (PVT != MVT::i32 && PVT != MVT::f32) + return SDValue(); + + int FI = -1; + int64_t Offset = 0; + if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { + FI = FINode->getIndex(); + Offset = 0; + } else if (Ptr.getOpcode() == ISD::ADD && + isa<ConstantSDNode>(Ptr.getOperand(1)) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + Offset = Ptr.getConstantOperandVal(1); + Ptr = Ptr.getOperand(0); + } else { + return SDValue(); + } + + SDValue Chain = LD->getChain(); + // Make sure the stack object alignment is at least 16. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + if (DAG.InferPtrAlignment(Ptr) < 16) { + if (MFI->isFixedObjectIndex(FI)) { + // Can't change the alignment. Reference stack + offset explicitly + // if stack pointer is at least 16-byte aligned. + unsigned StackAlign = Subtarget->getStackAlignment(); + if (StackAlign < 16) + return SDValue(); + Offset = MFI->getObjectOffset(FI) + Offset; + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, + getPointerTy()); + Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + DAG.getConstant(Offset & ~15, getPointerTy())); + Offset %= 16; + } else { + MFI->setObjectAlignment(FI, 16); + } + } + + // (Offset % 16) must be multiple of 4. Then address is then + // Ptr + (Offset & ~15). + if (Offset < 0) + return SDValue(); + if ((Offset % 16) & 3) + return SDValue(); + int64_t StartOffset = Offset & ~15; + if (StartOffset) + Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(), + Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); + + int EltNo = (Offset - StartOffset) >> 2; + int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; + EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; + SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0); + // Canonicalize it to a v4i32 shuffle. + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, + DAG.getVectorShuffle(MVT::v4i32, dl, V1, + DAG.getUNDEF(MVT::v4i32), &Mask[0])); + } + + return SDValue(); +} + +SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); // All zero's are handled with pxor, all one's are handled with pcmpeqd. @@ -3486,8 +3572,19 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } // Splat is obviously ok. Let legalizer expand it to a shuffle. - if (Values.size() == 1) + if (Values.size() == 1) { + if (EVTBits == 32) { + // Instead of a shuffle like this: + // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> + // Check if it's possible to issue this instead. + // shuffle (vload ptr)), undef, <1, 1, 1, 1> + unsigned Idx = CountTrailingZeros_32(NonZeros); + SDValue Item = Op.getOperand(Idx); + if (Op.getNode()->isOnlyUserOf(Item.getNode())) + return LowerAsSplatVectorLoad(Item, VT, dl, DAG); + } return SDValue(); + } // A vector full of immediates; various special cases are already // handled, so this is best done with a single constant-pool load. @@ -4278,7 +4375,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned ShAmt = 0; SDValue ShVal; bool isShift = getSubtarget()->hasSSE2() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -4815,6 +4912,7 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), @@ -4828,6 +4926,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue Ops[] = { Chain, TGA }; Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); } + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + MFI->setHasCalls(true); + SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); } @@ -5648,6 +5750,17 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { return SDValue(); SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); + + // Use sbb x, x to materialize carry bit into a GPR. + // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently + // miscompiling ARMISelDAGToDAG.cpp. + if (0 && !isFP && X86CC == X86::COND_B) { + return DAG.getNode(ISD::AND, dl, MVT::i8, + DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), Cond), + DAG.getConstant(1, MVT::i8)); + } + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), Cond); } @@ -5800,9 +5913,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { Cond = NewCond; } + // Look pass (and (setcc_carry (cmp ...)), 1). + if (Cond.getOpcode() == ISD::AND && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + if (C && C->getAPIntValue() == 1) + Cond = Cond.getOperand(0); + } + // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -5885,9 +6007,18 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { Cond = LowerXALUO(Cond, DAG); #endif + // Look pass (and (setcc_carry (cmp ...)), 1). + if (Cond.getOpcode() == ISD::AND && + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); + if (C && C->getAPIntValue() == 1) + Cond = Cond.getOperand(0); + } + // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -7274,6 +7405,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::COMI: return "X86ISD::COMI"; case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::SETCC: return "X86ISD::SETCC"; + case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; @@ -8327,16 +8459,6 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N, return TargetLowering::isGAPlusOffset(N, GA, Offset); } -static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, - const TargetLowering &TLI) { - GlobalValue *GV; - int64_t Offset = 0; - if (TLI.isGAPlusOffset(Base, GV, Offset)) - return (GV->getAlignment() >= N && (Offset % N) == 0); - // DAG combine handles the stack object case. - return false; -} - static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, EVT EltVT, LoadSDNode *&LDBase, unsigned &LastLoadedElt, @@ -8366,7 +8488,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, continue; LoadSDNode *LD = cast<LoadSDNode>(Elt); - if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI)) + if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) return false; LastLoadedElt = i; } @@ -8399,7 +8521,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); if (LastLoadedElt == NumElems - 1) { - if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI)) + if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16) return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), LD->isVolatile()); @@ -8858,11 +8980,42 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + EVT VT = N0.getValueType(); + + // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) + // since the result of setcc_c is all zero's or all ones. + if (N1C && N0.getOpcode() == ISD::AND && + N0.getOperand(1).getOpcode() == ISD::Constant) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == X86ISD::SETCC_CARRY || + ((N00.getOpcode() == ISD::ANY_EXTEND || + N00.getOpcode() == ISD::ZERO_EXTEND) && + N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) { + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + APInt ShAmt = N1C->getAPIntValue(); + Mask = Mask.shl(ShAmt); + if (Mask != 0) + return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + N00, DAG.getConstant(Mask, VT)); + } + } + + return SDValue(); +} /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts /// when possible. static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!VT.isVector() && VT.isInteger() && + N->getOpcode() == ISD::SHL) + return PerformSHLCombine(N, DAG); + // On X86 with SSE2 support, we can transform this to a vector shift if // all elements are shifted by the same amount. We can't do this in legalize // because the a constant vector is typically transformed to a constant pool @@ -8870,7 +9023,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (!Subtarget->hasSSE2()) return SDValue(); - EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7b4ab62..64bc70c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -118,6 +118,10 @@ namespace llvm { /// operand produced by a CMP instruction. SETCC, + // Same as SETCC except it's materialized with a sbb and the value is all + // one's or all zero's. + SETCC_CARRY, + /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the /// flag operand produced by a CMP or TEST instruction. It also writes a @@ -626,7 +630,9 @@ namespace llvm { std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned); - + + SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, + SelectionDAG &DAG); SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index b5fa862..b6a2c05 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1333,6 +1333,15 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] X86_COND_NO, EFLAGS))]>, TB; } // isTwoAddress +// Use sbb to materialize carry flag into a GPR. +let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in +def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), + "sbb{q}\t$dst, $dst", + [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; + +def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C64r)>; + //===----------------------------------------------------------------------===// // Conversion Instructions... // diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a37013d..1947d35 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -783,12 +783,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - return hasLoadFromStackSlot(MI, FrameIndex); + const MachineMemOperand *Dummy; + return hasLoadFromStackSlot(MI, Dummy, FrameIndex); } return 0; } bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, int &FrameIndex) const { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); @@ -798,6 +800,7 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, if (const FixedStackPseudoSourceValue *Value = dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { FrameIndex = Value->getFrameIndex(); + MMO = *o; return true; } } @@ -819,12 +822,14 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, if ((Reg = isStoreToStackSlot(MI, FrameIndex))) return Reg; // Check for post-frame index elimination operations - return hasStoreToStackSlot(MI, FrameIndex); + const MachineMemOperand *Dummy; + return hasStoreToStackSlot(MI, Dummy, FrameIndex); } return 0; } bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, int &FrameIndex) const { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); @@ -834,6 +839,7 @@ bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, if (const FixedStackPseudoSourceValue *Value = dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { FrameIndex = Value->getFrameIndex(); + MMO = *o; return true; } } @@ -1052,6 +1058,112 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) { return false; } +/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when +/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting +/// to a 32-bit superregister and then truncating back down to a 16-bit +/// subregister. +MachineInstr * +X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, + MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + MachineInstr *MI = MBBI; + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isKill = MI->getOperand(1).isKill(); + + unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() + ? X86::LEA64_32r : X86::LEA32r; + MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); + unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); + + // Build and insert into an implicit UNDEF value. This is OK because + // well be shifting and then extracting the lower 16-bits. + // This has the potential to cause partial register stall. e.g. + // movw (%rbp,%rcx,2), %dx + // leal -65(%rdx), %esi + // But testing has shown this *does* help performance in 64-bit mode (at + // least on modern x86 machines). + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); + MachineInstr *InsMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) + .addReg(leaInReg) + .addReg(Src, getKillRegState(isKill)) + .addImm(X86::SUBREG_16BIT); + + MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), + get(Opc), leaOutReg); + switch (MIOpc) { + default: + llvm_unreachable(0); + break; + case X86::SHL16ri: { + unsigned ShAmt = MI->getOperand(2).getImm(); + MIB.addReg(0).addImm(1 << ShAmt) + .addReg(leaInReg, RegState::Kill).addImm(0); + break; + } + case X86::INC16r: + case X86::INC64_16r: + addLeaRegOffset(MIB, leaInReg, true, 1); + break; + case X86::DEC16r: + case X86::DEC64_16r: + addLeaRegOffset(MIB, leaInReg, true, -1); + break; + case X86::ADD16ri: + case X86::ADD16ri8: + addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + break; + case X86::ADD16rr: { + unsigned Src2 = MI->getOperand(2).getReg(); + bool isKill2 = MI->getOperand(2).isKill(); + unsigned leaInReg2 = 0; + MachineInstr *InsMI2 = 0; + if (Src == Src2) { + // ADD16rr %reg1028<kill>, %reg1028 + // just a single insert_subreg. + addRegReg(MIB, leaInReg, true, leaInReg, false); + } else { + leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); + // Build and insert into an implicit UNDEF value. This is OK because + // well be shifting and then extracting the lower 16-bits. + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); + InsMI2 = + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) + .addReg(leaInReg2) + .addReg(Src2, getKillRegState(isKill2)) + .addImm(X86::SUBREG_16BIT); + addRegReg(MIB, leaInReg, true, leaInReg2, true); + } + if (LV && isKill2 && InsMI2) + LV->replaceKillInstruction(Src2, MI, InsMI2); + break; + } + } + + MachineInstr *NewMI = MIB; + MachineInstr *ExtMI = + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(leaOutReg, RegState::Kill) + .addImm(X86::SUBREG_16BIT); + + if (LV) { + // Update live variables + LV->getVarInfo(leaInReg).Kills.push_back(NewMI); + LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); + if (isKill) + LV->replaceKillInstruction(Src, MI, InsMI); + if (isDead) + LV->replaceKillInstruction(Dest, MI, ExtMI); + } + + return ExtMI; +} + /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target /// may be able to convert a two-address instruction into a true @@ -1077,7 +1189,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *NewMI = NULL; // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When // we have better subtarget support, enable the 16-bit LEA generation here. + // 16-bit LEA is also slow on Core2. bool DisableLEA16 = true; + bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); unsigned MIOpc = MI->getOpcode(); switch (MIOpc) { @@ -1116,8 +1230,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; - unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ? - X86::LEA64_32r : X86::LEA32r; + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) @@ -1131,51 +1244,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned ShAmt = MI->getOperand(2).getImm(); if (ShAmt == 0 || ShAmt >= 4) return 0; - if (DisableLEA16) { - // If 16-bit LEA is disabled, use 32-bit LEA via subregisters. - MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); - unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() - ? X86::LEA64_32r : X86::LEA32r; - unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); - - // Build and insert into an implicit UNDEF value. This is OK because - // well be shifting and then extracting the lower 16-bits. - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); - MachineInstr *InsMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) - .addReg(leaInReg) - .addReg(Src, getKillRegState(isKill)) - .addImm(X86::SUBREG_16BIT); - - NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg) - .addReg(0).addImm(1 << ShAmt) - .addReg(leaInReg, RegState::Kill) - .addImm(0); - - MachineInstr *ExtMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill) - .addImm(X86::SUBREG_16BIT); - - if (LV) { - // Update live variables - LV->getVarInfo(leaInReg).Kills.push_back(NewMI); - LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); - if (isKill) - LV->replaceKillInstruction(Src, MI, InsMI); - if (isDead) - LV->replaceKillInstruction(Dest, MI, ExtMI); - } - return ExtMI; - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)) - .addImm(0); - } + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | getDeadRegState(isDead)) + .addReg(0).addImm(1 << ShAmt) + .addReg(Src, getKillRegState(isKill)) + .addImm(0); break; } default: { @@ -1185,7 +1260,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (hasLiveCondCodeDef(MI)) return 0; - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); switch (MIOpc) { default: return 0; case X86::INC64r: @@ -1202,7 +1276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::INC16r: case X86::INC64_16r: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | @@ -1223,7 +1298,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::DEC16r: case X86::DEC64_16r: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | @@ -1246,7 +1322,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } case X86::ADD16rr: { - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); @@ -1261,56 +1338,32 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32: case X86::ADD64ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); break; case X86::ADD32ri: - case X86::ADD32ri8: + case X86::ADD32ri8: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) { - unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), + unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); - } break; + } case X86::ADD16ri: case X86::ADD16ri8: - if (DisableLEA16) return 0; + if (DisableLEA16) + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - if (MI->getOperand(2).isImm()) - NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), - Src, isKill, MI->getOperand(2).getImm()); - break; - case X86::SHL16ri: - if (DisableLEA16) return 0; - case X86::SHL32ri: - case X86::SHL64ri: { - assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() && - "Unknown shl instruction!"); - unsigned ShAmt = MI->getOperand(2).getImm(); - if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) { - X86AddressMode AM; - AM.Scale = 1 << ShAmt; - AM.IndexReg = Src; - unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r - : (MIOpc == X86::SHL32ri - ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r); - NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(Dest, RegState::Define | - getDeadRegState(isDead)), AM); - if (isKill) - NewMI->getOperand(3).setIsKill(true); - } + NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + .addReg(Dest, RegState::Define | + getDeadRegState(isDead)), + Src, isKill, MI->getOperand(2).getImm()); break; } - } } } @@ -1571,14 +1624,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::iterator I = MBB.end(); while (I != MBB.begin()) { --I; - // Working from the bottom, when we see a non-terminator - // instruction, we're done. + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. if (!isBrAnalysisUnpredicatedTerminator(I, *this)) break; - // A terminator that isn't a branch can't easily be handled - // by this analysis. + + // A terminator that isn't a branch can't easily be handled by this + // analysis. if (!I->getDesc().isBranch()) return true; + // Handle unconditional branches. if (I->getOpcode() == X86::JMP) { if (!AllowModify) { @@ -1587,10 +1643,12 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, } // If the block has any instructions after a JMP, delete them. - while (next(I) != MBB.end()) - next(I)->eraseFromParent(); + while (llvm::next(I) != MBB.end()) + llvm::next(I)->eraseFromParent(); + Cond.clear(); FBB = 0; + // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { TBB = 0; @@ -1598,14 +1656,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, I = MBB.end(); continue; } + // TBB is used to indicate the unconditinal destination. TBB = I->getOperand(0).getMBB(); continue; } + // Handle conditional branches. X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); if (BranchCode == X86::COND_INVALID) return true; // Can't handle indirect branch. + // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { FBB = TBB; @@ -1613,24 +1674,26 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, Cond.push_back(MachineOperand::CreateImm(BranchCode)); continue; } - // Handle subsequent conditional branches. Only handle the case - // where all conditional branches branch to the same destination - // and their condition opcodes fit one of the special - // multi-branch idioms. + + // Handle subsequent conditional branches. Only handle the case where all + // conditional branches branch to the same destination and their condition + // opcodes fit one of the special multi-branch idioms. assert(Cond.size() == 1); assert(TBB); - // Only handle the case where all conditional branches branch to - // the same destination. + + // Only handle the case where all conditional branches branch to the same + // destination. if (TBB != I->getOperand(0).getMBB()) return true; - X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); + // If the conditions are the same, we can leave them alone. + X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); if (OldBranchCode == BranchCode) continue; - // If they differ, see if they fit one of the known patterns. - // Theoretically we could handle more patterns here, but - // we shouldn't expect to see them if instruction selection - // has done a reasonable job. + + // If they differ, see if they fit one of the known patterns. Theoretically, + // we could handle more patterns here, but we shouldn't expect to see them + // if instruction selection has done a reasonable job. if ((OldBranchCode == X86::COND_NP && BranchCode == X86::COND_E) || (OldBranchCode == X86::COND_E && @@ -1643,6 +1706,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, BranchCode = X86::COND_NE_OR_P; else return true; + // Update the MachineOperand. Cond[0].setImm(BranchCode); } @@ -2713,27 +2777,6 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, return I->second.first; } -bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case X86::TCRETURNri: - case X86::TCRETURNdi: - case X86::RET: // Return. - case X86::RETI: - case X86::TAILJMPd: - case X86::TAILJMPr: - case X86::TAILJMPm: - case X86::JMP: // Uncond branch. - case X86::JMP32r: // Indirect branch. - case X86::JMP64r: // Indirect branch (64-bit). - case X86::JMP32m: // Indirect branch through mem. - case X86::JMP64m: // Indirect branch through mem (64-bit). - return true; - default: return false; - } -} - bool X86InstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 1 && "Invalid X86 branch condition!"); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index c6daa25..b83441d 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -457,11 +457,14 @@ public: /// hasLoadFromStackSlot - If the specified machine instruction has /// a load from a stack slot, return true along with the FrameIndex - /// of the loaded stack slot. If not, return false. Unlike + /// of the loaded stack slot and the machine mem operand containing + /// the reference. If not, return false. Unlike /// isLoadFromStackSlot, this returns true for any instructions that /// loads from the stack. This is a hint only and may not catch all /// cases. - bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + bool hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination @@ -472,11 +475,13 @@ public: /// hasStoreToStackSlot - If the specified machine instruction has a /// store to a stack slot, return true along with the FrameIndex of - /// the loaded stack slot. If not, return false. Unlike - /// isStoreToStackSlot, this returns true for any instructions that - /// loads from the stack. This is a hint only and may not catch all - /// cases. - bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// the loaded stack slot and the machine mem operand containing the + /// reference. If not, return false. Unlike isStoreToStackSlot, + /// this returns true for any instructions that loads from the + /// stack. This is a hint only and may not catch all cases. + bool hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const; @@ -595,7 +600,6 @@ public: bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex = 0) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; @@ -633,6 +637,11 @@ public: unsigned getGlobalBaseReg(MachineFunction *MF) const; private: + MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, + MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, unsigned OpNum, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 90ef1f4..3cc1853 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -87,6 +87,7 @@ def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>; def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, [SDNPHasChain]>; def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; +def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, @@ -816,7 +817,7 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, - (outs GR16:$dst), (ins i32mem:$src), + (outs GR16:$dst), (ins lea32mem:$src), "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; let isReMaterializable = 1 in def LEA32r : I<0x8D, MRMSrcMem, @@ -3059,6 +3060,21 @@ let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags let Uses = [EFLAGS] in { +// Use sbb to materialize carry bit. + +let Defs = [EFLAGS], isCodeGenOnly = 1 in { +def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), + "sbb{b}\t$dst, $dst", + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), + "sbb{w}\t$dst, $dst", + [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>, + OpSize; +def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), + "sbb{l}\t$dst, $dst", + [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>; +} // isCodeGenOnly + def SETEr : I<0x94, MRM0r, (outs GR8 :$dst), (ins), "sete\t$dst", @@ -4169,6 +4185,12 @@ def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)), addr:$dst), (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; +// (anyext (setcc_carry)) -> (zext (setcc_carry)) +def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C16r)>; +def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))), + (SETB_C32r)>; + //===----------------------------------------------------------------------===// // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dfdd4ce..62841f8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2083,7 +2083,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4i32 (pshufd:$src2 - (bc_v4i32(memopv2i64 addr:$src1)), + (bc_v4i32 (memopv2i64 addr:$src1)), (undef))))]>; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 33852bd..d96aafd 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -423,21 +423,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Stack Frame Processing methods //===----------------------------------------------------------------------===// -static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { - unsigned MaxAlign = 0; - - for (int i = FFI->getObjectIndexBegin(), - e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - - unsigned Align = FFI->getObjectAlignment(i); - MaxAlign = std::max(MaxAlign, Align); - } - - return MaxAlign; -} - /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -638,10 +623,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - unsigned MaxAlign = std::max(MFI->getMaxAlignment(), - calculateMaxStackAlignment(MFI)); - - MFI->setMaxAlignment(MaxAlign); + MFI->calculateMaxStackAlignment(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); @@ -741,7 +723,7 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, if (MBBI == MBB.end()) return; - MachineBasicBlock::iterator NI = next(MBBI); + MachineBasicBlock::iterator NI = llvm::next(MBBI); if (NI == MBB.end()) return; unsigned Opc = NI->getOpcode(); @@ -775,7 +757,7 @@ static int mergeSPUpdates(MachineBasicBlock &MBB, return 0; MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; - MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); + MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); unsigned Opc = PI->getOpcode(); int Offset = 0; @@ -1001,7 +983,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { } // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); @@ -1482,45 +1464,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } #include "X86GenRegisterInfo.inc" - -namespace { - struct MSAC : public MachineFunctionPass { - static char ID; - MSAC() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF) { - MachineFrameInfo *FFI = MF.getFrameInfo(); - MachineRegisterInfo &RI = MF.getRegInfo(); - - // Calculate max stack alignment of all already allocated stack objects. - unsigned MaxAlign = calculateMaxStackAlignment(FFI); - - // Be over-conservative: scan over all vreg defs and find, whether vector - // registers are used. If yes - there is probability, that vector register - // will be spilled and thus stack needs to be aligned properly. - for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; - RegNum < RI.getLastVirtReg(); ++RegNum) - MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); - - if (FFI->getMaxAlignment() == MaxAlign) - return false; - - FFI->setMaxAlignment(MaxAlign); - return true; - } - - virtual const char *getPassName() const { - return "X86 Maximal Stack Alignment Calculator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - }; - - char MSAC::ID = 0; -} - -FunctionPass* -llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 661f560..75cdbad 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -367,5 +367,5 @@ bool X86Subtarget::enablePostRAScheduler( RegClassVector& CriticalPathRCs) const { Mode = TargetSubtarget::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); - return OptLevel >= CodeGenOpt::Default; + return OptLevel >= CodeGenOpt::Aggressive; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0cda8bc..0152121 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -163,7 +163,7 @@ bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - PM.add(createX86MaxStackAlignmentCalculatorPass()); + PM.add(createMaxStackAlignmentCalculatorPass()); return false; // -print-machineinstr shouldn't print after this. } diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index e616fe6..5a54844 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -453,26 +453,6 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } -/// BlockHasNoFallThrough - Analyse if MachineBasicBlock does not -/// fall-through into its successor block. -bool XCoreInstrInfo:: -BlockHasNoFallThrough(const MachineBasicBlock &MBB) const -{ - if (MBB.empty()) return false; - - switch (MBB.back().getOpcode()) { - case XCore::RETSP_u6: // Return. - case XCore::RETSP_lu6: - case XCore::BAU_1r: // Indirect branch. - case XCore::BRFU_u6: // Uncond branch. - case XCore::BRFU_lu6: - case XCore::BRBU_u6: - case XCore::BRBU_lu6: - return true; - default: return false; - } -} - /// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool XCoreInstrInfo:: diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 24230ac..3e0a765 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -87,8 +87,6 @@ public: MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI) const; - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - virtual bool ReverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const; }; diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 4635d0e..1793bbf 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2493,29 +2493,28 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { Changed = true; } - // If the aliasee has internal linkage, give it the name and linkage - // of the alias, and delete the alias. This turns: - // define internal ... @f(...) - // @a = alias ... @f - // into: - // define ... @a(...) - if (!Target->hasLocalLinkage()) - continue; - - // The transform is only useful if the alias does not have internal linkage. - if (J->hasLocalLinkage()) - continue; + // If the alias is externally visible, we may still be able to simplify it. + if (!J->hasLocalLinkage()) { + // If the aliasee has internal linkage, give it the name and linkage + // of the alias, and delete the alias. This turns: + // define internal ... @f(...) + // @a = alias ... @f + // into: + // define ... @a(...) + if (!Target->hasLocalLinkage()) + continue; - // Do not perform the transform if multiple aliases potentially target the - // aliasee. This check also ensures that it is safe to replace the section - // and other attributes of the aliasee with those of the alias. - if (!hasOneUse) - continue; + // Do not perform the transform if multiple aliases potentially target the + // aliasee. This check also ensures that it is safe to replace the section + // and other attributes of the aliasee with those of the alias. + if (!hasOneUse) + continue; - // Give the aliasee the name, linkage and other attributes of the alias. - Target->takeName(J); - Target->setLinkage(J->getLinkage()); - Target->GlobalValue::copyAttributesFrom(J); + // Give the aliasee the name, linkage and other attributes of the alias. + Target->takeName(J); + Target->setLinkage(J->getLinkage()); + Target->GlobalValue::copyAttributesFrom(J); + } // Delete the alias. M.getAliasList().erase(J); diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 2951dbc..829da6b 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -15,6 +15,7 @@ #ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H #define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#include "llvm/BasicBlock.h" #include "llvm/ADT/EquivalenceClasses.h" #include <vector> #include <algorithm> @@ -33,6 +34,18 @@ namespace llvm { typename MaximumSpanningTree<CT>::EdgeWeight Y) const { if (X.second > Y.second) return true; if (X.second < Y.second) return false; + if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) { + if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) { + if (BBX->size() > BBY->size()) return true; + if (BBX->size() < BBY->size()) return false; + } + } + if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) { + if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) { + if (BBX->size() > BBY->size()) return true; + if (BBX->size() < BBY->size()) return false; + } + } return false; } }; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 9ca90c3..e4c4ae5 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -21,7 +21,6 @@ #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" @@ -563,7 +562,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { return false; } -/// OptimizeMemoryInst - Load and Store Instructions have often have +/// OptimizeMemoryInst - Load and Store Instructions often have /// addressing modes that can do significant amounts of computation. As such, /// instruction selection will try to get the load or store to do as much /// computation as possible for the program. The problem is that isel can only diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index b0988b5..1cfde8f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -85,9 +85,14 @@ static bool doesClobberMemory(Instruction *I) { return true; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: - case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true; + default: + return false; + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::init_trampoline: + case Intrinsic::lifetime_end: + return true; } } return false; @@ -111,14 +116,13 @@ static Value *getPointerOperand(Instruction *I) { return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) return MI->getOperand(1); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: - assert(false && "Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return II->getOperand(1); - case Intrinsic::lifetime_end: - return II->getOperand(2); + + switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + default: assert(false && "Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return I->getOperand(1); + case Intrinsic::lifetime_end: + return I->getOperand(2); } } @@ -135,15 +139,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { Len = MI->getLength(); } else { - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: - assert(false && "Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return -1u; - case Intrinsic::lifetime_end: - Len = II->getOperand(1); - break; + switch (cast<IntrinsicInst>(I)->getIntrinsicID()) { + default: assert(false && "Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return -1u; + case Intrinsic::lifetime_end: + Len = I->getOperand(1); + break; } } if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len)) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 6f1c32c..222792b 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -31,15 +31,18 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -187,8 +190,11 @@ template <> struct DenseMapInfo<Expression> { static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; + +template <> +struct isPodLike<Expression> { static const bool value = true; }; + } //===----------------------------------------------------------------------===// @@ -488,21 +494,21 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { // Check to see if we have a single dominating call instruction that is // identical to C. for (unsigned i = 0, e = deps.size(); i != e; ++i) { - const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; + const NonLocalDepEntry *I = &deps[i]; // Ignore non-local dependencies. - if (I->second.isNonLocal()) + if (I->getResult().isNonLocal()) continue; // We don't handle non-depedencies. If we already have a call, reject // instruction dependencies. - if (I->second.isClobber() || cdep != 0) { + if (I->getResult().isClobber() || cdep != 0) { cdep = 0; break; } - CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst()); + CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst()); // FIXME: All duplicated with non-local case. - if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ + if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ cdep = NonLocalDepCall; continue; } @@ -987,27 +993,27 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, } -/// AnalyzeLoadFromClobberingStore - This function is called when we have a -/// memdep query of a load that ends up being a clobbering store. This means -/// that the store *may* provide bits used by the load but we can't be sure -/// because the pointers don't mustalias. Check this case to see if there is -/// anything more we can do before we give up. This returns -1 if we have to -/// give up, or a byte number in the stored value of the piece that feeds the -/// load. -static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, +/// AnalyzeLoadFromClobberingWrite - This function is called when we have a +/// memdep query of a load that ends up being a clobbering memory write (store, +/// memset, memcpy, memmove). This means that the write *may* provide bits used +/// by the load but we can't be sure because the pointers don't mustalias. +/// +/// Check this case to see if there is anything more we can do before we give +/// up. This returns -1 if we have to give up, or a byte number in the stored +/// value of the piece that feeds the load. +static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, + Value *WritePtr, + uint64_t WriteSizeInBits, const TargetData &TD) { // If the loaded or stored value is an first class array or struct, don't try // to transform them. We need to be able to bitcast to integer. - if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) || - isa<StructType>(DepSI->getOperand(0)->getType()) || - isa<ArrayType>(DepSI->getOperand(0)->getType())) + if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy)) return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = - GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD); + Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); Value *LoadBase = - GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD); + GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); if (StoreBase != LoadBase) return -1; @@ -1018,12 +1024,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, #if 0 errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" << "Base = " << *StoreBase << "\n" - << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" - << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" - << "Load Ptr = " << *L->getPointerOperand() << "\n" - << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; - errs() << "'" << L->getParent()->getParent()->getName() << "'" - << *L->getParent(); + << "Store Ptr = " << *WritePtr << "\n" + << "Store Offs = " << StoreOffset << "\n" + << "Load Ptr = " << *LoadPtr << "\n"; + abort(); #endif return -1; } @@ -1033,12 +1037,11 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, // must have gotten confused. // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then // remove this check, as it is duplicated with what we have below. - uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); - uint64_t LoadSize = TD.getTypeSizeInBits(L->getType()); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); - if ((StoreSize & 7) | (LoadSize & 7)) + if ((WriteSizeInBits & 7) | (LoadSize & 7)) return -1; - StoreSize >>= 3; // Convert to bytes. + uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. LoadSize >>= 3; @@ -1052,12 +1055,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, #if 0 errs() << "STORE LOAD DEP WITH COMMON BASE:\n" << "Base = " << *StoreBase << "\n" - << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" - << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" - << "Load Ptr = " << *L->getPointerOperand() << "\n" - << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; - errs() << "'" << L->getParent()->getParent()->getName() << "'" - << *L->getParent(); + << "Store Ptr = " << *WritePtr << "\n" + << "Store Offs = " << StoreOffset << "\n" + << "Load Ptr = " << *LoadPtr << "\n"; + abort(); #endif return -1; } @@ -1075,6 +1076,66 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, return LoadOffset-StoreOffset; } +/// AnalyzeLoadFromClobberingStore - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. +static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, + StoreInst *DepSI, + const TargetData &TD) { + // Cannot handle reading from store of first-class aggregate yet. + if (isa<StructType>(DepSI->getOperand(0)->getType()) || + isa<ArrayType>(DepSI->getOperand(0)->getType())) + return -1; + + Value *StorePtr = DepSI->getPointerOperand(); + uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, + StorePtr, StoreSize, TD); +} + +static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, + MemIntrinsic *MI, + const TargetData &TD) { + // If the mem operation is a non-constant size, we can't handle it. + ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength()); + if (SizeCst == 0) return -1; + uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; + + // If this is memset, we just need to see if the offset is valid in the size + // of the memset.. + if (MI->getIntrinsicID() == Intrinsic::memset) + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, TD); + + // If we have a memcpy/memmove, the only case we can handle is if this is a + // copy from constant memory. In that case, we can read directly from the + // constant memory. + MemTransferInst *MTI = cast<MemTransferInst>(MI); + + Constant *Src = dyn_cast<Constant>(MTI->getSource()); + if (Src == 0) return -1; + + GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject()); + if (GV == 0 || !GV->isConstant()) return -1; + + // See if the access is within the bounds of the transfer. + int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, + MI->getDest(), MemSizeInBits, TD); + if (Offset == -1) + return Offset; + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = ConstantExpr::getBitCast(Src, + llvm::Type::getInt8PtrTy(Src->getContext())); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); + if (ConstantFoldLoadFromConstPtr(Src, &TD)) + return Offset; + return -1; +} + /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means @@ -1089,50 +1150,134 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + IRBuilder<> Builder(InsertPt->getParent(), InsertPt); // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. if (isa<PointerType>(SrcVal->getType())) - SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt); + SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); if (!isa<IntegerType>(SrcVal->getType())) - SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), - "tmp", InsertPt); + SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), + "tmp"); // Shift the bits to the least significant depending on endianness. unsigned ShiftAmt; - if (TD.isLittleEndian()) { + if (TD.isLittleEndian()) ShiftAmt = Offset*8; - } else { + else ShiftAmt = (StoreSize-LoadSize-Offset)*8; - } if (ShiftAmt) - SrcVal = BinaryOperator::CreateLShr(SrcVal, - ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt); + SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); if (LoadSize != StoreSize) - SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), - "tmp", InsertPt); + SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), + "tmp"); return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } +/// GetMemInstValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering mem intrinsic. +static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + const Type *LoadTy, Instruction *InsertPt, + const TargetData &TD){ + LLVMContext &Ctx = LoadTy->getContext(); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + + IRBuilder<> Builder(InsertPt->getParent(), InsertPt); + + // We know that this method is only called when the mem transfer fully + // provides the bits for the load. + if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) { + // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and + // independently of what the offset is. + Value *Val = MSI->getValue(); + if (LoadSize != 1) + Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); + + Value *OneElt = Val; + + // Splat the value out to the right number of bits. + for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { + // If we can double the number of bytes set, do it. + if (NumBytesSet*2 <= LoadSize) { + Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); + Val = Builder.CreateOr(Val, ShVal); + NumBytesSet <<= 1; + continue; + } + + // Otherwise insert one byte at a time. + Value *ShVal = Builder.CreateShl(Val, 1*8); + Val = Builder.CreateOr(OneElt, ShVal); + ++NumBytesSet; + } + + return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); + } + + // Otherwise, this is a memcpy/memmove from a constant global. + MemTransferInst *MTI = cast<MemTransferInst>(SrcInst); + Constant *Src = cast<Constant>(MTI->getSource()); + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = ConstantExpr::getBitCast(Src, + llvm::Type::getInt8PtrTy(Src->getContext())); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); + return ConstantFoldLoadFromConstPtr(Src, &TD); +} + + + struct AvailableValueInBlock { /// BB - The basic block in question. BasicBlock *BB; + enum ValType { + SimpleVal, // A simple offsetted value that is accessed. + MemIntrin // A memory intrinsic which is loaded from. + }; + /// V - The value that is live out of the block. - Value *V; - /// Offset - The byte offset in V that is interesting for the load query. + PointerIntPair<Value *, 1, ValType> Val; + + /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; static AvailableValueInBlock get(BasicBlock *BB, Value *V, unsigned Offset = 0) { AvailableValueInBlock Res; Res.BB = BB; - Res.V = V; + Res.Val.setPointer(V); + Res.Val.setInt(SimpleVal); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(MI); + Res.Val.setInt(MemIntrin); Res.Offset = Offset; return Res; } + + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + Value *getSimpleValue() const { + assert(isSimpleValue() && "Wrong accessor"); + return Val.getPointer(); + } + + MemIntrinsic *getMemIntrinValue() const { + assert(!isSimpleValue() && "Wrong accessor"); + return cast<MemIntrinsic>(Val.getPointer()); + } }; /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, @@ -1149,30 +1294,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, const Type *LoadTy = LI->getType(); for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { - BasicBlock *BB = ValuesPerBlock[i].BB; - Value *AvailableVal = ValuesPerBlock[i].V; - unsigned Offset = ValuesPerBlock[i].Offset; + const AvailableValueInBlock &AV = ValuesPerBlock[i]; + BasicBlock *BB = AV.BB; if (SSAUpdate.HasValueForBlock(BB)) continue; - - if (AvailableVal->getType() != LoadTy) { - assert(TD && "Need target data to handle type mismatch case"); - AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, - BB->getTerminator(), *TD); - - if (Offset) { - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" - << *ValuesPerBlock[i].V << '\n' + + unsigned Offset = AV.Offset; + + Value *AvailableVal; + if (AV.isSimpleValue()) { + AvailableVal = AV.getSimpleValue(); + if (AvailableVal->getType() != LoadTy) { + assert(TD && "Need target data to handle type mismatch case"); + AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, + BB->getTerminator(), *TD); + + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + << *AV.getSimpleValue() << '\n' << *AvailableVal << '\n' << "\n\n\n"); } - - - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" - << *ValuesPerBlock[i].V << '\n' + } else { + AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset, + LoadTy, BB->getTerminator(), *TD); + DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + << " " << *AV.getMemIntrinValue() << '\n' << *AvailableVal << '\n' << "\n\n\n"); } - SSAUpdate.AddAvailableValue(BB, AvailableVal); } @@ -1187,12 +1335,18 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } +static bool isLifetimeStart(Instruction *Inst) { + if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) + return II->getIntrinsicID() == Intrinsic::lifetime_start; + return false; +} + /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl<Instruction*> &toErase) { // Find the non-local dependencies of the load. - SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps; + SmallVector<NonLocalDepEntry, 64> Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " @@ -1206,11 +1360,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].second.isClobber()) { + if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { DEBUG( errs() << "GVN: non-local load "; WriteAsOperand(errs(), LI); - errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n'; + errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; ); return false; } @@ -1225,18 +1379,24 @@ bool GVN::processNonLocalLoad(LoadInst *LI, const TargetData *TD = 0; for (unsigned i = 0, e = Deps.size(); i != e; ++i) { - BasicBlock *DepBB = Deps[i].first; - MemDepResult DepInfo = Deps[i].second; + BasicBlock *DepBB = Deps[i].getBB(); + MemDepResult DepInfo = Deps[i].getResult(); if (DepInfo.isClobber()) { + // The address being loaded in this non-local block may not be the same as + // the pointer operand of the load if PHI translation occurs. Make sure + // to consider the right address. + Value *Address = Deps[i].getAddress(); + // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { if (TD == 0) TD = getAnalysisIfAvailable<TargetData>(); - if (TD) { - int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD); + if (TD && Address) { + int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, + DepSI, *TD); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, DepSI->getOperand(0), @@ -1245,8 +1405,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } } + + // If the clobbering value is a memset/memcpy/memmove, see if we can + // forward a value on from it. + if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) { + if (TD == 0) + TD = getAnalysisIfAvailable<TargetData>(); + if (TD && Address) { + int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, + DepMI, *TD); + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, + Offset)); + continue; + } + } + } - // FIXME: Handle memset/memcpy. UnavailableBlocks.push_back(DepBB); continue; } @@ -1254,21 +1429,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, Instruction *DepInst = DepInfo.getInst(); // Loading the allocation -> undef. - if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) { + if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) || + // Loading immediately after lifetime begin -> undef. + isLifetimeStart(DepInst)) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, UndefValue::get(LI->getType()))); continue; } - // Loading immediately after lifetime begin or end -> undef. - if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - UndefValue::get(LI->getType()))); - } - } - if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. @@ -1378,19 +1546,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].V == LI) + if (ValuesPerBlock[i].isSimpleValue() && + ValuesPerBlock[i].getSimpleValue() == LI) return false; + // FIXME: It is extremely unclear what this loop is doing, other than + // artificially restricting loadpre. if (isSinglePred) { bool isHot = false; - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V)) + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + const AvailableValueInBlock &AV = ValuesPerBlock[i]; + if (AV.isSimpleValue()) // "Hot" Instruction is in some loop (because it dominates its dep. // instruction). - if (DT->dominates(LI, I)) { - isHot = true; - break; - } + if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue())) + if (DT->dominates(LI, I)) { + isHot = true; + break; + } + } // We are interested only in "hot" instructions. We don't want to do any // mis-optimizations here. @@ -1435,29 +1609,43 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Do PHI translation to get its value in the predecessor if necessary. The // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. // - // FIXME: This may insert a computation, but we don't tell scalar GVN - // optimization stuff about it. How do we do this? SmallVector<Instruction*, 8> NewInsts; - Value *LoadPtr = 0; // If all preds have a single successor, then we know it is safe to insert the // load on the pred (?!?), so we can insert code to materialize the pointer if // it is not available. + PHITransAddr Address(LI->getOperand(0), TD); + Value *LoadPtr = 0; if (allSingleSucc) { - LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB, - UnavailablePred, TD, *DT,NewInsts); + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, + *DT, NewInsts); } else { - LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB, - UnavailablePred, TD, *DT); - } + Address.PHITranslateValue(LoadBB, UnavailablePred); + LoadPtr = Address.getAddr(); + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr)) + if (!DT->dominates(Inst->getParent(), UnavailablePred)) + LoadPtr = 0; + } + // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. if (LoadPtr == 0) { + assert(NewInsts.empty() && "Shouldn't insert insts on failure"); DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " << *LI->getOperand(0) << "\n"); return false; } + + // Assign value numbers to these new instructions. + for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { + // FIXME: We really _ought_ to insert these value numbers into their + // parent's availability map. However, in doing so, we risk getting into + // ordering issues. If a block hasn't been processed yet, we would be + // marking a value as AVAIL-IN, which isn't what we intend. + VN.lookup_or_add(NewInsts[i]); + } // Make sure it is valid to move this load here. We have to watch out for: // @1 = getelementptr (i8* p, ... @@ -1517,11 +1705,6 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // If the value isn't available, don't do anything! if (Dep.isClobber()) { - // FIXME: We should handle memset/memcpy/memmove as dependent instructions - // to forward the value if available. - //if (isa<MemIntrinsic>(Dep.getInst())) - //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n"; - // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1532,25 +1715,42 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { // a common base + constant offset, and if the previous store (or memset) // completely covers this load. This sort of thing can happen in bitfield // access code. + Value *AvailVal = 0; if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { - int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD); - if (Offset != -1) { - Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, - L->getType(), L, *TD); - DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n' - << *AvailVal << '\n' << *L << "\n\n\n"); - - // Replace the load! - L->replaceAllUsesWith(AvailVal); - if (isa<PointerType>(AvailVal->getType())) - MD->invalidateCachedPointerInfo(AvailVal); - toErase.push_back(L); - NumGVNLoad++; - return true; - } + int Offset = AnalyzeLoadFromClobberingStore(L->getType(), + L->getPointerOperand(), + DepSI, *TD); + if (Offset != -1) + AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + L->getType(), L, *TD); } + // If the clobbering value is a memset/memcpy/memmove, see if we can forward + // a value on from it. + if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) { + if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { + int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), + L->getPointerOperand(), + DepMI, *TD); + if (Offset != -1) + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); + } + } + + if (AvailVal) { + DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' + << *AvailVal << '\n' << *L << "\n\n\n"); + + // Replace the load! + L->replaceAllUsesWith(AvailVal); + if (isa<PointerType>(AvailVal->getType())) + MD->invalidateCachedPointerInfo(AvailVal); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + DEBUG( // fast print dep, using operator<< on instruction would be too slow errs() << "GVN: load "; @@ -1635,11 +1835,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) { return true; } - // If this load occurs either right after a lifetime begin or a lifetime end, + // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); toErase.push_back(L); NumGVNLoad++; diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index d12ad81..b9c536f 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -8585,25 +8585,36 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) { uint32_t BitWidth = ITy->getBitWidth(); - if (BitWidth > 1) { - Value *LHS = ICI->getOperand(0); - Value *RHS = ICI->getOperand(1); - - APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); - APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - APInt TypeMask(APInt::getHighBitsSet(BitWidth, BitWidth-1)); - ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); - - if (KnownZeroLHS.countLeadingOnes() == BitWidth-1 && - KnownZeroRHS.countLeadingOnes() == BitWidth-1) { + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + + APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); + APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); + ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); + + if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { + APInt KnownBits = KnownZeroLHS | KnownOneLHS; + APInt UnknownBit = ~KnownBits; + if (UnknownBit.countPopulation() == 1) { if (!DoXform) return ICI; - Value *Xor = Builder->CreateXor(LHS, RHS); + Value *Result = Builder->CreateXor(LHS, RHS); + + // Mask off any bits that are set and won't be shifted away. + if (KnownOneLHS.uge(UnknownBit)) + Result = Builder->CreateAnd(Result, + ConstantInt::get(ITy, UnknownBit)); + + // Shift the bit we're testing down to the lsb. + Result = Builder->CreateLShr( + Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Xor = Builder->CreateXor(Xor, ConstantInt::get(ITy, 1)); - Xor->takeName(ICI); - return ReplaceInstUsesWith(CI, Xor); + Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); + Result->takeName(ICI); + return ReplaceInstUsesWith(CI, Result); } } } @@ -11189,8 +11200,9 @@ namespace llvm { return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift && LHS.Width == RHS.Width; } - static bool isPod() { return true; } }; + template <> + struct isPodLike<LoweredPHIRecord> { static const bool value = true; }; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 1b93f34..d58b9c9 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -718,6 +718,11 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, if (PredSI->getSuccessor(PredCase) != DestBB && DestSI->getSuccessor(i) != DestBB) continue; + + // Do not forward this if it already goes to this destination, this would + // be an infinite loop. + if (PredSI->getSuccessor(PredCase) == DestSucc) + continue; // Otherwise, we're safe to make the change. Make sure that the edge from // DestSI to DestSucc is not critical and has no PHI nodes. diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 5511387..42a8fdc 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -160,16 +160,17 @@ namespace { // Because the exit block is not in the loop, we know we have to get _at // least_ its immediate dominator. - do { - // Get next Immediate Dominator. - IDom = IDom->getIDom(); - + IDom = IDom->getIDom(); + + while (IDom && IDom != BlockInLoopNode) { // If we have got to the header of the loop, then the instructions block // did not dominate the exit node, so we can't hoist it. if (IDom->getBlock() == LoopHeader) return false; - } while (IDom != BlockInLoopNode); + // Get next Immediate Dominator. + IDom = IDom->getIDom(); + }; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 564c7ac..85cc712 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -24,18 +24,14 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/Type.h" #include "llvm/DerivedTypes.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" @@ -85,8 +81,6 @@ namespace { class LoopStrengthReduce : public LoopPass { IVUsers *IU; - LoopInfo *LI; - DominatorTree *DT; ScalarEvolution *SE; bool Changed; @@ -94,10 +88,6 @@ namespace { /// particular stride. std::map<const SCEV *, IVsOfOneStride> IVsByStride; - /// StrideNoReuse - Keep track of all the strides whose ivs cannot be - /// reused (nor should they be rewritten to reuse other strides). - SmallSet<const SCEV *, 4> StrideNoReuse; - /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. SmallVector<WeakVH, 16> DeadInsts; @@ -109,8 +99,7 @@ namespace { public: static char ID; // Pass ID, replacement for typeid explicit LoopStrengthReduce(const TargetLowering *tli = NULL) : - LoopPass(&ID), TLI(tli) { - } + LoopPass(&ID), TLI(tli) {} bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -118,13 +107,11 @@ namespace { // We split critical edges, so we change the CFG. However, we do update // many analyses if they are around. AU.addPreservedID(LoopSimplifyID); - AU.addPreserved<LoopInfo>(); - AU.addPreserved<DominanceFrontier>(); - AU.addPreserved<DominatorTree>(); + AU.addPreserved("loops"); + AU.addPreserved("domfrontier"); + AU.addPreserved("domtree"); AU.addRequiredID(LoopSimplifyID); - AU.addRequired<LoopInfo>(); - AU.addRequired<DominatorTree>(); AU.addRequired<ScalarEvolution>(); AU.addPreserved<ScalarEvolution>(); AU.addRequired<IVUsers>(); @@ -228,19 +215,17 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { if (DeadInsts.empty()) return; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back()); - DeadInsts.pop_back(); + Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()); if (I == 0 || !isInstructionTriviallyDead(I)) continue; - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) if (Instruction *U = dyn_cast<Instruction>(*OI)) { *OI = 0; if (U->use_empty()) DeadInsts.push_back(U); } - } I->eraseFromParent(); Changed = true; @@ -265,7 +250,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) + if (newLoop->contains(L->getHeader())) return false; } return true; @@ -338,9 +323,6 @@ namespace { /// BasedUser - For a particular base value, keep information about how we've /// partitioned the expression so far. struct BasedUser { - /// SE - The current ScalarEvolution object. - ScalarEvolution *SE; - /// Base - The Base value for the PHI node that needs to be inserted for /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this @@ -372,9 +354,9 @@ namespace { bool isUseOfPostIncrementedValue; BasedUser(IVStrideUse &IVSU, ScalarEvolution *se) - : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()), + : Base(IVSU.getOffset()), Inst(IVSU.getUser()), OperandValToReplace(IVSU.getOperandValToReplace()), - Imm(SE->getIntegerSCEV(0, Base->getType())), + Imm(se->getIntegerSCEV(0, Base->getType())), isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {} // Once we rewrite the code to insert the new IVs we want, update the @@ -383,14 +365,14 @@ namespace { void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, - LoopInfo &LI, - SmallVectorImpl<WeakVH> &DeadInsts); + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution *SE); Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, Loop *L, - LoopInfo &LI); + Instruction *IP, + ScalarEvolution *SE); void dump() const; }; } @@ -404,27 +386,12 @@ void BasedUser::dump() const { Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, - Instruction *IP, Loop *L, - LoopInfo &LI) { - // Figure out where we *really* want to insert this code. In particular, if - // the user is inside of a loop that is nested inside of L, we really don't - // want to insert this expression before the user, we'd rather pull it out as - // many loops as possible. - Instruction *BaseInsertPt = IP; - - // Figure out the most-nested loop that IP is in. - Loop *InsertLoop = LI.getLoopFor(IP->getParent()); - - // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out - // the preheader of the outer-most loop where NewBase is not loop invariant. - if (L->contains(IP->getParent())) - while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) { - BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator(); - InsertLoop = InsertLoop->getParentLoop(); - } - - Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); + Instruction *IP, + ScalarEvolution *SE) { + Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP); + // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to + // re-analyze it. const SCEV *NewValSCEV = SE->getUnknown(Base); // Always emit the immediate into the same block as the user. @@ -443,8 +410,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, - LoopInfo &LI, - SmallVectorImpl<WeakVH> &DeadInsts) { + SmallVectorImpl<WeakVH> &DeadInsts, + ScalarEvolution *SE) { if (!isa<PHINode>(Inst)) { // By default, insert code at the user instruction. BasicBlock::iterator InsertPt = Inst; @@ -473,7 +440,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, } Value *NewVal = InsertCodeForBaseAtPosition(NewBase, OperandValToReplace->getType(), - Rewriter, InsertPt, L, LI); + Rewriter, InsertPt, SE); // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); @@ -535,7 +502,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), - Rewriter, InsertPt, L, LI); + Rewriter, InsertPt, SE); DEBUG(errs() << " Changing PHI use to "); DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); @@ -1011,17 +978,13 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector<BasedUser>& UsersToProcess) { - if (StrideNoReuse.count(Stride)) - return SE->getIntegerSCEV(0, Stride->getType()); - if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { std::map<const SCEV *, IVsOfOneStride>::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); - if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) || - StrideNoReuse.count(SI->first)) + if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first)) continue; // The other stride has no uses, don't reuse it. std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI = @@ -1780,8 +1743,8 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride, RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV)); User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt, - Rewriter, L, this, *LI, - DeadInsts); + Rewriter, L, this, + DeadInsts, SE); // Mark old value we replaced as possibly dead, so that it is eliminated // if we just replaced the last use of that value. @@ -2745,8 +2708,6 @@ bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis<IVUsers>(); - LI = &getAnalysis<LoopInfo>(); - DT = &getAnalysis<DominatorTree>(); SE = &getAnalysis<ScalarEvolution>(); Changed = false; @@ -2792,15 +2753,14 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // After all sharing is done, see if we can adjust the loop to test against // zero instead of counting up to a maximum. This is usually faster. OptimizeLoopCountIV(L); - } - // We're done analyzing this loop; release all the state we built up for it. - IVsByStride.clear(); - StrideNoReuse.clear(); + // We're done analyzing this loop; release all the state we built up for it. + IVsByStride.clear(); - // Clean up after ourselves - if (!DeadInsts.empty()) - DeleteTriviallyDeadInstructions(); + // Clean up after ourselves + if (!DeadInsts.empty()) + DeleteTriviallyDeadInstructions(); + } // At this point, it is worth checking to see if any recurrence PHIs are also // dead, so that we can remove them as well. diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 38d267a..b7adfdc 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -404,12 +404,13 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ initLoopData(); - Function *F = loopHeader->getParent(); // If LoopSimplify was unable to form a preheader, don't do any unswitching. if (!loopPreheader) return false; + Function *F = loopHeader->getParent(); + // If the condition is trivial, always unswitch. There is no code growth for // this case. if (!IsTrivialUnswitchCondition(LoopCond)) { diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp index 001267a..dbc82e1 100644 --- a/lib/Transforms/Scalar/SCCVN.cpp +++ b/lib/Transforms/Scalar/SCCVN.cpp @@ -19,7 +19,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/Operator.h" #include "llvm/Value.h" #include "llvm/ADT/DenseMap.h" @@ -155,8 +154,10 @@ template <> struct DenseMapInfo<Expression> { static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; +template <> +struct isPodLike<Expression> { static const bool value = true; }; + } //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index ae6ad74..b040a27 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -105,7 +105,7 @@ namespace { void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, AllocaInfo &Info); void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, - AllocaInfo &Info); + AllocaInfo &Info); void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, unsigned OpNo, AllocaInfo &Info); void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, @@ -362,7 +362,6 @@ void SROA::DoScalarReplacement(AllocaInst *AI, // Now that we have created the alloca instructions that we want to use, // expand the getelementptr instructions to use them. - // while (!AI->use_empty()) { Instruction *User = cast<Instruction>(AI->use_back()); if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) { @@ -450,11 +449,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI, NumReplaced++; } - /// isSafeElementUse - Check to see if this use is an allowed use for a /// getelementptr instruction of an array aggregate allocation. isFirstElt /// indicates whether Ptr is known to the start of the aggregate. -/// void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, AllocaInfo &Info) { for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); @@ -503,7 +500,6 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, } } - isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); if (Info.isUnsafe) return; break; @@ -543,9 +539,8 @@ static bool AllUsersAreLoads(Value *Ptr) { return true; } -/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an +/// isSafeUseOfAllocation - Check if this user is an allowed use for an /// aggregate allocation. -/// void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, AllocaInfo &Info) { if (BitCastInst *C = dyn_cast<BitCastInst>(User)) @@ -614,7 +609,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, // integer. Specifically, consider A[0][i]. We cannot know that the user // isn't doing invalid things like allowing i to index an out-of-range // subscript that accesses A[1]. Because of this, we have to reject SROA - // of any accesses into structs where any of the components are variables. + // of any accesses into structs where any of the components are variables. if (IdxVal->getZExtValue() >= AT->getNumElements()) return MarkUnsafe(Info); } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) { @@ -628,7 +623,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); } -/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory +/// isSafeMemIntrinsicOnAllocation - Check if the specified memory /// intrinsic can be promoted by SROA. At this point, we know that the operand /// of the memintrinsic is a pointer to the beginning of the allocation. void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, @@ -656,8 +651,8 @@ void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, } } -/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast -/// are +/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast +/// from an alloca are safe for SROA of that alloca. void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, AllocaInfo &Info) { for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); @@ -773,6 +768,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, OtherPtr = MTI->getRawDest(); } } + + // Keep track of the other intrinsic argument, so it can be removed if it + // is dead when the intrinsic is replaced. + Value *PossiblyDead = OtherPtr; // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. @@ -926,9 +925,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, } } MI->eraseFromParent(); + if (PossiblyDead) + RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); } -/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that +/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that /// overwrites the entire allocation. Extract out the pieces of the stored /// integer and store them individually. void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -1052,7 +1053,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, SI->eraseFromParent(); } -/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to +/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to /// an integer. Load the individual pieces to form the aggregate value. void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts) { @@ -1186,7 +1187,6 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) { /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, /// or 1 if safe after canonicalization has been performed. -/// int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // Loop over the use list of the alloca. We can only transform it if all of // the users are safe to transform. @@ -1215,7 +1215,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return Info.needsCleanup ? 1 : 3; } -/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP +/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP /// is canonicalized here. void SROA::CleanupGEP(GetElementPtrInst *GEPI) { gep_type_iterator I = gep_type_begin(GEPI); @@ -1347,7 +1347,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all -/// its accesses to use a to single vector type, return true, and set VecTy to +/// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable /// integer, return true but set VecTy to VoidTy. Further, if the use is not a /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset @@ -1355,7 +1355,6 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, /// /// If we see at least one access to the value that is as a vector type, set the /// SawVec flag. -/// bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, bool &SawVec, uint64_t Offset, unsigned AllocaSize) { @@ -1438,7 +1437,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, return true; } - /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca /// directly. This happens when we are converting an "integer union" to a /// single integer scalar, or when we are converting a "vector union" to a @@ -1481,7 +1479,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (StoreInst *SI = dyn_cast<StoreInst>(User)) { assert(SI->getOperand(0) != Ptr && "Consistency error!"); // FIXME: Remove once builder has Twine API. - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); + Value *Old = Builder.CreateLoad(NewAI, + (NewAI->getName()+".in").str().c_str()); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, Builder); Builder.CreateStore(New, NewAI); @@ -1506,7 +1505,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { APVal |= APVal << 8; // FIXME: Remove once builder has Twine API. - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); + Value *Old = Builder.CreateLoad(NewAI, + (NewAI->getName()+".in").str().c_str()); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), Old, Offset, Builder); @@ -1679,7 +1679,6 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, return FromVal; } - /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer /// or vector value "Old" at the offset specified by Offset. /// diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index 13077fe..5acd6aa 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -88,7 +88,7 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, if (!isa<ReturnInst>(Body->getTerminator())) break; - Instruction *NextInst = next(BasicBlock::iterator(Call)); + Instruction *NextInst = llvm::next(BasicBlock::iterator(Call)); // Inline the call, taking care of what code ends up where. NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index f9b929c..6fd884b 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -128,8 +128,7 @@ public: /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) { - return - B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); + return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); } /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -157,27 +156,25 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B) { Module *M = Caller->getParent(); - Intrinsic::ID IID = Intrinsic::memcpy; - const Type *Tys[1]; - Tys[0] = Len->getType(); - Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1); - return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len, + const Type *Ty = Len->getType(); + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); + return B.CreateCall4(MemCpy, Dst, Src, Len, ConstantInt::get(Type::getInt32Ty(*Context), Align)); } -/// EmitMemMOve - Emit a call to the memmove function to the builder. This +/// EmitMemMove - Emit a call to the memmove function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B) { Module *M = Caller->getParent(); - Intrinsic::ID IID = Intrinsic::memmove; - const Type *Tys[1]; - Tys[0] = TD->getIntPtrType(*Context); - Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1); - Value *D = CastToCStr(Dst, B); - Value *S = CastToCStr(Src, B); + const Type *Ty = TD->getIntPtrType(*Context); + Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1); + Dst = CastToCStr(Dst, B); + Src = CastToCStr(Src, B); Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align); - return B.CreateCall4(MemMove, D, S, Len, A); + return B.CreateCall4(MemMove, Dst, Src, Len, A); } /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is @@ -2647,10 +2644,11 @@ bool SimplifyLibCalls::doInitialization(Module &M) { // * strcspn("",a) -> 0 // * strcspn(s,"") -> strlen(a) // -// strstr: +// strstr: (PR5783) // * strstr(x,x) -> x -// * strstr(s1,s2) -> offset_of_s2_in(s1) -// (if s1 and s2 are constant strings) +// * strstr(x, "") -> x +// * strstr(x, "a") -> strchr(x, 'a') +// * strstr(s1,s2) -> result (if s1 and s2 are constant strings) // // tan, tanf, tanl: // * tan(atan(x)) -> x diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2974592..2962e84 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -16,7 +16,6 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Constant.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index aef0f5f..7a37aa3 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -603,3 +603,65 @@ bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I, return true; } +/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI +/// nodes in this block. This doesn't try to be clever about PHI nodes +/// which differ only in the order of the incoming values, but instcombine +/// orders them so it usually won't matter. +/// +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { + bool Changed = false; + + // This implementation doesn't currently consider undef operands + // specially. Theroetically, two phis which are identical except for + // one having an undef where the other doesn't could be collapsed. + + // Map from PHI hash values to PHI nodes. If multiple PHIs have + // the same hash value, the element is the first PHI in the + // linked list in CollisionMap. + DenseMap<uintptr_t, PHINode *> HashMap; + + // Maintain linked lists of PHI nodes with common hash values. + DenseMap<PHINode *, PHINode *> CollisionMap; + + // Examine each PHI. + for (BasicBlock::iterator I = BB->begin(); + PHINode *PN = dyn_cast<PHINode>(I++); ) { + // Compute a hash value on the operands. Instcombine will likely have sorted + // them, which helps expose duplicates, but we have to check all the + // operands to be safe in case instcombine hasn't run. + uintptr_t Hash = 0; + for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) { + // This hash algorithm is quite weak as hash functions go, but it seems + // to do a good enough job for this particular purpose, and is very quick. + Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); + Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); + } + // If we've never seen this hash value before, it's a unique PHI. + std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair = + HashMap.insert(std::make_pair(Hash, PN)); + if (Pair.second) continue; + // Otherwise it's either a duplicate or a hash collision. + for (PHINode *OtherPN = Pair.first->second; ; ) { + if (OtherPN->isIdenticalTo(PN)) { + // A duplicate. Replace this PHI with its duplicate. + PN->replaceAllUsesWith(OtherPN); + PN->eraseFromParent(); + Changed = true; + break; + } + // A non-duplicate hash collision. + DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN); + if (I == CollisionMap.end()) { + // Set this PHI to be the head of the linked list of colliding PHIs. + PHINode *Old = Pair.first->second; + Pair.first->second = PN; + CollisionMap[PN] = Old; + break; + } + // Procede to the next PHI in the list. + OtherPN = I->second; + } + } + + return Changed; +} diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 8c18b59..743bb6e 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -244,7 +244,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { // Merge case into clusters if (Cases.size()>=2) - for (CaseItr I=Cases.begin(), J=next(Cases.begin()); J!=Cases.end(); ) { + for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) { int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue(); int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue(); BasicBlock* nextBB = J->BB; diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index e25f9e2..846e432 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -55,7 +55,6 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { static bool isEqual(const EltTy &LHS, const EltTy &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; } @@ -102,7 +101,7 @@ namespace { public: typedef std::vector<Value *> ValVector; - RenamePassData() {} + RenamePassData() : BB(NULL), Pred(NULL), Values() {} RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) : BB(B), Pred(P), Values(V) {} BasicBlock *BB; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 8a07c35..ba41bf9 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -295,10 +295,14 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { InsertedVal = SingularValue; } + // Either path through the 'if' should have set insertedVal -> SingularVal. + assert((InsertedVal == SingularValue || isa<UndefValue>(InsertedVal)) && + "RAUW didn't change InsertedVal to be SingularVal"); + // Drop the entries we added in IncomingPredInfo to restore the stack. IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, IncomingPredInfo.end()); - return InsertedVal; + return SingularValue; } // Otherwise, we do need a PHI: insert one now if we don't already have one. diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 89b0bd9..d7ca45e 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1589,69 +1589,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { return true; } -/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI -/// nodes in this block. This doesn't try to be clever about PHI nodes -/// which differ only in the order of the incoming values, but instcombine -/// orders them so it usually won't matter. -/// -bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { - bool Changed = false; - - // This implementation doesn't currently consider undef operands - // specially. Theroetically, two phis which are identical except for - // one having an undef where the other doesn't could be collapsed. - - // Map from PHI hash values to PHI nodes. If multiple PHIs have - // the same hash value, the element is the first PHI in the - // linked list in CollisionMap. - DenseMap<uintptr_t, PHINode *> HashMap; - - // Maintain linked lists of PHI nodes with common hash values. - DenseMap<PHINode *, PHINode *> CollisionMap; - - // Examine each PHI. - for (BasicBlock::iterator I = BB->begin(); - PHINode *PN = dyn_cast<PHINode>(I++); ) { - // Compute a hash value on the operands. Instcombine will likely have sorted - // them, which helps expose duplicates, but we have to check all the - // operands to be safe in case instcombine hasn't run. - uintptr_t Hash = 0; - for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) { - // This hash algorithm is quite weak as hash functions go, but it seems - // to do a good enough job for this particular purpose, and is very quick. - Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I)); - Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); - } - // If we've never seen this hash value before, it's a unique PHI. - std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair = - HashMap.insert(std::make_pair(Hash, PN)); - if (Pair.second) continue; - // Otherwise it's either a duplicate or a hash collision. - for (PHINode *OtherPN = Pair.first->second; ; ) { - if (OtherPN->isIdenticalTo(PN)) { - // A duplicate. Replace this PHI with its duplicate. - PN->replaceAllUsesWith(OtherPN); - PN->eraseFromParent(); - Changed = true; - break; - } - // A non-duplicate hash collision. - DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN); - if (I == CollisionMap.end()) { - // Set this PHI to be the head of the linked list of colliding PHIs. - PHINode *Old = Pair.first->second; - Pair.first->second = PN; - CollisionMap[PN] = Old; - break; - } - // Procede to the next PHI in the list. - OtherPN = I->second; - } - } - - return Changed; -} - /// SimplifyCFG - This function is used to do simplification of a CFG. For /// example, it adjusts branches to branches to eliminate the extra hop, it /// eliminates unreachable basic blocks, and does other "peephole" optimization diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 82d7914..c765d96 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -813,6 +813,11 @@ void SlotTracker::CreateFunctionSlot(const Value *V) { void SlotTracker::CreateMetadataSlot(const MDNode *N) { assert(N && "Can't insert a null Value into SlotTracker!"); + // Don't insert if N contains an instruction. + for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) + if (N->getElement(i) && isa<Instruction>(N->getElement(i))) + return; + ValueMap::iterator I = mdnMap.find(N); if (I != mdnMap.end()) return; @@ -1227,6 +1232,25 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, } if (const MDNode *N = dyn_cast<MDNode>(V)) { + if (Machine->getMetadataSlot(N) == -1) { + // Print metadata inline, not via slot reference number. + Out << "!{"; + for (unsigned mi = 0, me = N->getNumElements(); mi != me; ++mi) { + const Value *Val = N->getElement(mi); + if (!Val) + Out << "null"; + else { + TypePrinter->print(N->getElement(0)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, N->getElement(0), TypePrinter, Machine); + } + if (mi + 1 != me) + Out << ", "; + } + Out << '}'; + return; + } + Out << '!' << Machine->getMetadataSlot(N); return; } @@ -1636,6 +1660,7 @@ void AssemblyWriter::printFunction(const Function *F) { case CallingConv::ARM_APCS: Out << "arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break; + case CallingConv::MSP430_INTR: Out << "msp430_intrcc "; break; default: Out << "cc" << F->getCallingConv() << " "; break; } @@ -1903,6 +1928,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { case CallingConv::ARM_APCS: Out << " arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break; + case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break; default: Out << " cc" << CI->getCallingConv(); break; } @@ -1953,6 +1979,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { case CallingConv::ARM_APCS: Out << " arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break; + case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break; default: Out << " cc" << II->getCallingConv(); break; } diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp index 23d0557..c7f7f53 100644 --- a/lib/VMCore/BasicBlock.cpp +++ b/lib/VMCore/BasicBlock.cpp @@ -262,7 +262,7 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { assert(I != InstList.end() && "Trying to get me to create degenerate basic block!"); - BasicBlock *InsertBefore = next(Function::iterator(this)) + BasicBlock *InsertBefore = llvm::next(Function::iterator(this)) .getNodePtrUnchecked(); BasicBlock *New = BasicBlock::Create(getContext(), BBName, getParent(), InsertBefore); diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index c622558..a62f75b 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -1560,7 +1560,7 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C, Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy, Constant *C, - Value* const *Idxs, + Value *const *Idxs, unsigned NumIdx) { assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx) == diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 6cf2c81..88e1fe8 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -77,6 +77,13 @@ bool Argument::hasByValAttr() const { return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal); } +/// hasNestAttr - Return true if this argument has the nest attribute on +/// it in its containing function. +bool Argument::hasNestAttr() const { + if (!isa<PointerType>(getType())) return false; + return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest); +} + /// hasNoAliasAttr - Return true if this argument has the noalias attribute on /// it in its containing function. bool Argument::hasNoAliasAttr() const { diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 1c3244b..8a2378e 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -62,7 +62,6 @@ struct DenseMapAPIntKeyInfo { static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) { return LHS == RHS; } - static bool isPod() { return false; } }; struct DenseMapAPFloatKeyInfo { @@ -89,7 +88,6 @@ struct DenseMapAPFloatKeyInfo { static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) { return LHS == RHS; } - static bool isPod() { return false; } }; class LLVMContextImpl { diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index 1232fe2..6bea7a8 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -41,6 +41,10 @@ Pass::~Pass() { // Force out-of-line virtual method. ModulePass::~ModulePass() { } +PassManagerType ModulePass::getPotentialPassManagerType() const { + return PMT_ModulePassManager; +} + bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const { return Resolver->getAnalysisIfAvailable(AnalysisID, true) != 0; } @@ -60,6 +64,27 @@ const char *Pass::getPassName() const { return "Unnamed pass: implement Pass::getPassName()"; } +void Pass::preparePassManager(PMStack &) { + // By default, don't do anything. +} + +PassManagerType Pass::getPotentialPassManagerType() const { + // Default implementation. + return PMT_Unknown; +} + +void Pass::getAnalysisUsage(AnalysisUsage &) const { + // By default, no analysis results are used, all are invalidated. +} + +void Pass::releaseMemory() { + // By default, don't do anything. +} + +void Pass::verifyAnalysis() const { + // By default, don't do anything. +} + // print - Print out the internal state of the pass. This is called by Analyze // to print out the contents of an analysis. Otherwise it is not necessary to // implement this method. @@ -79,6 +104,10 @@ void Pass::dump() const { // Force out-of-line virtual method. ImmutablePass::~ImmutablePass() { } +void ImmutablePass::initializePass() { + // By default, don't do anything. +} + //===----------------------------------------------------------------------===// // FunctionPass Implementation // @@ -107,6 +136,20 @@ bool FunctionPass::run(Function &F) { return Changed | doFinalization(*F.getParent()); } +bool FunctionPass::doInitialization(Module &) { + // By default, don't do anything. + return false; +} + +bool FunctionPass::doFinalization(Module &) { + // By default, don't do anything. + return false; +} + +PassManagerType FunctionPass::getPotentialPassManagerType() const { + return PMT_FunctionPassManager; +} + //===----------------------------------------------------------------------===// // BasicBlockPass Implementation // @@ -121,6 +164,30 @@ bool BasicBlockPass::runOnFunction(Function &F) { return Changed | doFinalization(F); } +bool BasicBlockPass::doInitialization(Module &) { + // By default, don't do anything. + return false; +} + +bool BasicBlockPass::doInitialization(Function &) { + // By default, don't do anything. + return false; +} + +bool BasicBlockPass::doFinalization(Function &) { + // By default, don't do anything. + return false; +} + +bool BasicBlockPass::doFinalization(Module &) { + // By default, don't do anything. + return false; +} + +PassManagerType BasicBlockPass::getPotentialPassManagerType() const { + return PMT_BasicBlockPassManager; +} + //===----------------------------------------------------------------------===// // Pass Registration mechanism // diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index ae418a0..52e8a82 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -738,9 +738,15 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { std::map<AnalysisID, Pass *>::iterator Info = I++; if (!dynamic_cast<ImmutablePass*>(Info->second) && std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == - PreservedSet.end()) + PreservedSet.end()) { // Remove this analysis + if (PassDebugging >= Details) { + Pass *S = Info->second; + errs() << " -- '" << P->getPassName() << "' is not preserving '"; + errs() << S->getPassName() << "'\n"; + } InheritedAnalysis[Index]->erase(Info); + } } } } @@ -1391,8 +1397,7 @@ MPPassManager::runOnModule(Module &M) { for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { ModulePass *MP = getContainedPass(Index); - dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, - M.getModuleIdentifier().c_str()); + dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier()); dumpRequiredSet(MP); initializeAnalysisImpl(MP); @@ -1406,13 +1411,13 @@ MPPassManager::runOnModule(Module &M) { if (Changed) dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG, - M.getModuleIdentifier().c_str()); + M.getModuleIdentifier()); dumpPreservedSet(MP); verifyPreservedAnalysis(MP); removeNotPreservedAnalysis(MP); recordAvailableAnalysis(MP); - removeDeadPasses(MP, M.getModuleIdentifier().c_str(), ON_MODULE_MSG); + removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG); } // Finalize on-the-fly passes |