diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Utils/Local.cpp')
-rw-r--r-- | contrib/llvm/lib/Transforms/Utils/Local.cpp | 579 |
1 files changed, 512 insertions, 67 deletions
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index ba8af47..abc9b65 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -17,11 +17,13 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -188,9 +190,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, BasicBlock *BB = SI->getParent(); // Remove entries from PHI nodes which we no longer branch to... - for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + for (BasicBlock *Succ : SI->successors()) { // Found case matching a constant operand? - BasicBlock *Succ = SI->getSuccessor(i); if (Succ == TheOnlyDest) TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest else @@ -230,6 +231,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, SIDef->getValue().getZExtValue())); } + // Update make.implicit metadata to the newly-created conditional branch. + MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit); + if (MakeImplicitMD) + NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD); + // Delete the old switch. SI->eraseFromParent(); return true; @@ -283,8 +289,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI) { if (!I->use_empty() || isa<TerminatorInst>(I)) return false; - // We don't want the landingpad instruction removed by anything this general. - if (isa<LandingPadInst>(I)) + // We don't want the landingpad-like instructions removed by anything this + // general. + if (I->isEHPad()) return false; // We don't want debug info removed by anything this general, unless @@ -414,6 +421,49 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, return false; } +static bool +simplifyAndDCEInstruction(Instruction *I, + SmallSetVector<Instruction *, 16> &WorkList, + const DataLayout &DL, + const TargetLibraryInfo *TLI) { + if (isInstructionTriviallyDead(I, TLI)) { + // Null out all of the instruction's operands to see if any operand becomes + // dead as we go. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, nullptr); + + if (!OpV->use_empty() || I == OpV) + continue; + + // If the operand is an instruction that became dead as we nulled out the + // operand, and if it is 'trivially' dead, delete it in a future loop + // iteration. + if (Instruction *OpI = dyn_cast<Instruction>(OpV)) + if (isInstructionTriviallyDead(OpI, TLI)) + WorkList.insert(OpI); + } + + I->eraseFromParent(); + + return true; + } + + if (Value *SimpleV = SimplifyInstruction(I, DL)) { + // Add the users to the worklist. CAREFUL: an instruction can use itself, + // in the case of a phi node. + for (User *U : I->users()) + if (U != I) + WorkList.insert(cast<Instruction>(U)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + I->eraseFromParent(); + return true; + } + return false; +} + /// SimplifyInstructionsInBlock - Scan the specified basic block and try to /// simplify any instructions in it and recursively delete dead instructions. /// @@ -422,30 +472,34 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI) { bool MadeChange = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); #ifndef NDEBUG // In debug builds, ensure that the terminator of the block is never replaced // or deleted by these simplifications. The idea of simplification is that it // cannot introduce new instructions, and there is no way to replace the // terminator of a block without introducing a new instruction. - AssertingVH<Instruction> TerminatorVH(--BB->end()); + AssertingVH<Instruction> TerminatorVH(&BB->back()); #endif - for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) { + SmallSetVector<Instruction *, 16> WorkList; + // Iterate over the original function, only adding insts to the worklist + // if they actually need to be revisited. This avoids having to pre-init + // the worklist with the entire function's worth of instructions. + for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); BI != E;) { assert(!BI->isTerminator()); - Instruction *Inst = BI++; + Instruction *I = &*BI; + ++BI; - WeakVH BIHandle(BI); - if (recursivelySimplifyInstruction(Inst, TLI)) { - MadeChange = true; - if (BIHandle != BI) - BI = BB->begin(); - continue; - } + // We're visiting this instruction now, so make sure it's not in the + // worklist from an earlier visit. + if (!WorkList.count(I)) + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); + } - MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); - if (BIHandle != BI) - BI = BB->begin(); + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI); } return MadeChange; } @@ -808,7 +862,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { // Copy over any phi, debug or lifetime instruction. BB->getTerminator()->eraseFromParent(); - Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList()); + Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), + BB->getInstList()); } else { while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. @@ -889,37 +944,44 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { static unsigned enforceKnownAlignment(Value *V, unsigned Align, unsigned PrefAlign, const DataLayout &DL) { + assert(PrefAlign > Align); + V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + // TODO: ideally, computeKnownBits ought to have used + // AllocaInst::getAlignment() in its computation already, making + // the below max redundant. But, as it turns out, + // stripPointerCasts recurses through infinite layers of bitcasts, + // while computeKnownBits is not allowed to traverse more than 6 + // levels. + Align = std::max(AI->getAlignment(), Align); + if (PrefAlign <= Align) + return Align; + // If the preferred alignment is greater than the natural stack alignment // then don't round up. This avoids dynamic stack realignment. if (DL.exceedsNaturalStackAlignment(PrefAlign)) return Align; - // If there is a requested alignment and if this is an alloca, round up. - if (AI->getAlignment() >= PrefAlign) - return AI->getAlignment(); AI->setAlignment(PrefAlign); return PrefAlign; } if (auto *GO = dyn_cast<GlobalObject>(V)) { + // TODO: as above, this shouldn't be necessary. + Align = std::max(GO->getAlignment(), Align); + if (PrefAlign <= Align) + return Align; + // If there is a large requested alignment and we can, bump up the alignment // of the global. If the memory we set aside for the global may not be the // memory used by the final program then it is impossible for us to reliably // enforce the preferred alignment. - if (!GO->isStrongDefinitionForLinker()) + if (!GO->canIncreaseAlignment()) return Align; - if (GO->getAlignment() >= PrefAlign) - return GO->getAlignment(); - // We can only increase the alignment of the global if it has no alignment - // specified or if it is not assigned a section. If it is assigned a - // section, the global could be densely packed with other objects in the - // section, increasing the alignment could cause padding issues. - if (!GO->hasSection() || GO->getAlignment() == 0) - GO->setAlignment(PrefAlign); - return GO->getAlignment(); + GO->setAlignment(PrefAlign); + return PrefAlign; } return Align; @@ -997,9 +1059,31 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); - if (ExtendedArg) - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr, + if (ExtendedArg) { + // We're now only describing a subset of the variable. The piece we're + // describing will always be smaller than the variable size, because + // VariableSize == Size of Alloca described by DDI. Since SI stores + // to the alloca described by DDI, if it's first operand is an extend, + // we're guaranteed that before extension, the value was narrower than + // the size of the alloca, hence the size of the described variable. + SmallVector<uint64_t, 3> NewDIExpr; + unsigned PieceOffset = 0; + // If this already is a bit piece, we drop the bit piece from the expression + // and record the offset. + if (DIExpr->isBitPiece()) { + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); + PieceOffset = DIExpr->getBitPieceOffset(); + } else { + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); + } + NewDIExpr.push_back(dwarf::DW_OP_bit_piece); + NewDIExpr.push_back(PieceOffset); //Offset + const DataLayout &DL = DDI->getModule()->getDataLayout(); + NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size + Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, + Builder.createExpression(NewDIExpr), DDI->getDebugLoc(), SI); + } else Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, DDI->getDebugLoc(), SI); @@ -1017,8 +1101,13 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (LdStHasDebugValue(DIVar, LI)) return true; - Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr, - DDI->getDebugLoc(), LI); + // We are now tracking the loaded value instead of the address. In the + // future if multi-location support is added to the IR, it might be + // preferable to keep tracking both the loaded value and the original + // address in case the alloca can not be elided. + Instruction *DbgValue = Builder.insertDbgValueIntrinsic( + LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr); + DbgValue->insertAfter(LI); return true; } @@ -1034,8 +1123,8 @@ bool llvm::LowerDbgDeclare(Function &F) { DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false); SmallVector<DbgDeclareInst *, 4> Dbgs; for (auto &FI : F) - for (BasicBlock::iterator BI : FI) - if (auto DDI = dyn_cast<DbgDeclareInst>(BI)) + for (Instruction &BI : FI) + if (auto DDI = dyn_cast<DbgDeclareInst>(&BI)) Dbgs.push_back(DDI); if (Dbgs.empty()) @@ -1060,9 +1149,13 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. + SmallVector<uint64_t, 1> NewDIExpr; + auto *DIExpr = DDI->getExpression(); + NewDIExpr.push_back(dwarf::DW_OP_deref); + NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), - DDI->getExpression(), DDI->getDebugLoc(), - CI); + DIB.createExpression(NewDIExpr), + DDI->getDebugLoc(), CI); } DDI->eraseFromParent(); } @@ -1082,9 +1175,10 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { return nullptr; } -bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, - DIBuilder &Builder, bool Deref) { - DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); +bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, + Instruction *InsertBefore, DIBuilder &Builder, + bool Deref, int Offset) { + DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address); if (!DDI) return false; DebugLoc Loc = DDI->getDebugLoc(); @@ -1092,29 +1186,40 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - if (Deref) { + if (Deref || Offset) { // Create a copy of the original DIDescriptor for user variable, prepending // "deref" operation to a list of address elements, as new llvm.dbg.declare // will take a value storing address of the memory for variable, not // alloca itself. SmallVector<uint64_t, 4> NewDIExpr; - NewDIExpr.push_back(dwarf::DW_OP_deref); + if (Deref) + NewDIExpr.push_back(dwarf::DW_OP_deref); + if (Offset > 0) { + NewDIExpr.push_back(dwarf::DW_OP_plus); + NewDIExpr.push_back(Offset); + } else if (Offset < 0) { + NewDIExpr.push_back(dwarf::DW_OP_minus); + NewDIExpr.push_back(-Offset); + } if (DIExpr) NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpr = Builder.createExpression(NewDIExpr); } - // Insert llvm.dbg.declare in the same basic block as the original alloca, - // and remove old llvm.dbg.declare. - BasicBlock *BB = AI->getParent(); - Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB); + // Insert llvm.dbg.declare immediately after the original alloca, and remove + // old llvm.dbg.declare. + Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); DDI->eraseFromParent(); return true; } -/// changeToUnreachable - Insert an unreachable instruction before the specified -/// instruction, making it and the rest of the code in the block dead. -static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { +bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder, bool Deref, int Offset) { + return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder, + Deref, Offset); +} + +void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -1132,7 +1237,7 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { new UnreachableInst(I->getContext(), I); // All instructions after this are dead. - BasicBlock::iterator BBI = I, BBE = BB->end(); + BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); @@ -1142,8 +1247,11 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { /// changeToCall - Convert the specified invoke into a normal call. static void changeToCall(InvokeInst *II) { - SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); + SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end()); + SmallVector<OperandBundleDef, 1> OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -1162,7 +1270,7 @@ static bool markAliveBlocks(Function &F, SmallPtrSetImpl<BasicBlock*> &Reachable) { SmallVector<BasicBlock*, 128> Worklist; - BasicBlock *BB = F.begin(); + BasicBlock *BB = &F.front(); Worklist.push_back(BB); Reachable.insert(BB); bool Changed = false; @@ -1187,7 +1295,7 @@ static bool markAliveBlocks(Function &F, if (MakeUnreachable) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(BBI, false); + changeToUnreachable(&*BBI, false); Changed = true; break; } @@ -1201,7 +1309,7 @@ static bool markAliveBlocks(Function &F, ++BBI; if (!isa<UnreachableInst>(BBI)) { // Don't insert a call to llvm.trap right before the unreachable. - changeToUnreachable(BBI, false); + changeToUnreachable(&*BBI, false); Changed = true; } break; @@ -1227,8 +1335,9 @@ static bool markAliveBlocks(Function &F, } } - // Turn invokes that call 'nounwind' functions into ordinary calls. - if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + TerminatorInst *Terminator = BB->getTerminator(); + if (auto *II = dyn_cast<InvokeInst>(Terminator)) { + // Turn invokes that call 'nounwind' functions into ordinary calls. Value *Callee = II->getCalledValue(); if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { changeToUnreachable(II, true); @@ -1243,6 +1352,44 @@ static bool markAliveBlocks(Function &F, changeToCall(II); Changed = true; } + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) { + // Remove catchpads which cannot be reached. + struct CatchPadDenseMapInfo { + static CatchPadInst *getEmptyKey() { + return DenseMapInfo<CatchPadInst *>::getEmptyKey(); + } + static CatchPadInst *getTombstoneKey() { + return DenseMapInfo<CatchPadInst *>::getTombstoneKey(); + } + static unsigned getHashValue(CatchPadInst *CatchPad) { + return static_cast<unsigned>(hash_combine_range( + CatchPad->value_op_begin(), CatchPad->value_op_end())); + } + static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) { + if (LHS == getEmptyKey() || LHS == getTombstoneKey() || + RHS == getEmptyKey() || RHS == getTombstoneKey()) + return LHS == RHS; + return LHS->isIdenticalTo(RHS); + } + }; + + // Set of unique CatchPads. + SmallDenseMap<CatchPadInst *, detail::DenseSetEmpty, 4, + CatchPadDenseMapInfo, detail::DenseSetPair<CatchPadInst *>> + HandlerSet; + detail::DenseSetEmpty Empty; + for (CatchSwitchInst::handler_iterator I = CatchSwitch->handler_begin(), + E = CatchSwitch->handler_end(); + I != E; ++I) { + BasicBlock *HandlerBB = *I; + auto *CatchPad = cast<CatchPadInst>(HandlerBB->getFirstNonPHI()); + if (!HandlerSet.insert({CatchPad, Empty}).second) { + CatchSwitch->removeHandler(I); + --I; + --E; + Changed = true; + } + } } Changed |= ConstantFoldTerminator(BB, true); @@ -1253,10 +1400,44 @@ static bool markAliveBlocks(Function &F, return Changed; } +void llvm::removeUnwindEdge(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + + if (auto *II = dyn_cast<InvokeInst>(TI)) { + changeToCall(II); + return; + } + + TerminatorInst *NewTI; + BasicBlock *UnwindDest; + + if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) { + NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI); + UnwindDest = CRI->getUnwindDest(); + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) { + auto *NewCatchSwitch = CatchSwitchInst::Create( + CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(), + CatchSwitch->getName(), CatchSwitch); + for (BasicBlock *PadBB : CatchSwitch->handlers()) + NewCatchSwitch->addHandler(PadBB); + + NewTI = NewCatchSwitch; + UnwindDest = CatchSwitch->getUnwindDest(); + } else { + llvm_unreachable("Could not find unwind successor"); + } + + NewTI->takeName(TI); + NewTI->setDebugLoc(TI->getDebugLoc()); + UnwindDest->removePredecessor(BB); + TI->replaceAllUsesWith(NewTI); + TI->eraseFromParent(); +} + /// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false /// otherwise. -bool llvm::removeUnreachableBlocks(Function &F) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { SmallPtrSet<BasicBlock*, 128> Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -1270,17 +1451,20 @@ bool llvm::removeUnreachableBlocks(Function &F) { // Loop over all of the basic blocks that are not reachable, dropping all of // their internal references... for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { - if (Reachable.count(BB)) + if (Reachable.count(&*BB)) continue; - for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + for (succ_iterator SI = succ_begin(&*BB), SE = succ_end(&*BB); SI != SE; + ++SI) if (Reachable.count(*SI)) - (*SI)->removePredecessor(BB); + (*SI)->removePredecessor(&*BB); + if (LVI) + LVI->eraseBlock(&*BB); BB->dropAllReferences(); } for (Function::iterator I = ++F.begin(); I != F.end();) - if (!Reachable.count(I)) + if (!Reachable.count(&*I)) I = F.getBasicBlockList().erase(I); else ++I; @@ -1288,9 +1472,10 @@ bool llvm::removeUnreachableBlocks(Function &F) { return true; } -void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs) { +void llvm::combineMetadata(Instruction *K, const Instruction *J, + ArrayRef<unsigned> KnownIDs) { SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata; - K->dropUnknownMetadata(KnownIDs); + K->dropUnknownNonDebugMetadata(KnownIDs); K->getAllMetadataOtherThanDebugLoc(Metadata); for (unsigned i = 0, n = Metadata.size(); i < n; ++i) { unsigned Kind = Metadata[i].first; @@ -1326,8 +1511,29 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign // Only set the !nonnull if it is present in both instructions. K->setMetadata(Kind, JMD); break; + case LLVMContext::MD_invariant_group: + // Preserve !invariant.group in K. + break; + case LLVMContext::MD_align: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: + K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; } } + // Set !invariant.group from J if J has it. If both instructions have it + // then we will just pick it from J - even when they are different. + // Also make sure that K is load or store - f.e. combining bitcast with load + // could produce bitcast with invariant.group metadata, which is invalid. + // FIXME: we should try to preserve both invariant.group md if they are + // different, but right now instruction can only have one invariant.group. + if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group)) + if (isa<LoadInst>(K) || isa<StoreInst>(K)) + K->setMetadata(LLVMContext::MD_invariant_group, JMD); } unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, @@ -1349,3 +1555,242 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, } return Count; } + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlock *BB) { + assert(From->getType() == To->getType()); + + unsigned Count = 0; + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *I = cast<Instruction>(U.getUser()); + if (DT.dominates(BB, I->getParent())) { + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; + } + } + return Count; +} + +bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { + if (isa<IntrinsicInst>(CS.getInstruction())) + // Most LLVM intrinsics are things which can never take a safepoint. + // As a result, we don't need to have the stack parsable at the + // callsite. This is a highly useful optimization since intrinsic + // calls are fairly prevalent, particularly in debug builds. + return true; + + // Check if the function is specifically marked as a gc leaf function. + if (CS.hasFnAttr("gc-leaf-function")) + return true; + if (const Function *F = CS.getCalledFunction()) + return F->hasFnAttribute("gc-leaf-function"); + + return false; +} + +/// A potential constituent of a bitreverse or bswap expression. See +/// collectBitParts for a fuller explanation. +struct BitPart { + BitPart(Value *P, unsigned BW) : Provider(P) { + Provenance.resize(BW); + } + + /// The Value that this is a bitreverse/bswap of. + Value *Provider; + /// The "provenance" of each bit. Provenance[A] = B means that bit A + /// in Provider becomes bit B in the result of this expression. + SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128. + + enum { Unset = -1 }; +}; + +/// Analyze the specified subexpression and see if it is capable of providing +/// pieces of a bswap or bitreverse. The subexpression provides a potential +/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// the output of the expression came from a corresponding bit in some other +/// value. This function is recursive, and the end result is a mapping of +/// bitnumber to bitnumber. It is the caller's responsibility to validate that +/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse. +/// +/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know +/// that the expression deposits the low byte of %X into the high byte of the +/// result and that all other bits are zero. This expression is accepted and a +/// BitPart is returned with Provider set to %X and Provenance[24-31] set to +/// [0-7]. +/// +/// To avoid revisiting values, the BitPart results are memoized into the +/// provided map. To avoid unnecessary copying of BitParts, BitParts are +/// constructed in-place in the \c BPS map. Because of this \c BPS needs to +/// store BitParts objects, not pointers. As we need the concept of a nullptr +/// BitParts (Value has been analyzed and the analysis failed), we an Optional +/// type instead to provide the same functionality. +/// +/// Because we pass around references into \c BPS, we must use a container that +/// does not invalidate internal references (std::map instead of DenseMap). +/// +static const Optional<BitPart> & +collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, + std::map<Value *, Optional<BitPart>> &BPS) { + auto I = BPS.find(V); + if (I != BPS.end()) + return I->second; + + auto &Result = BPS[V] = None; + auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, + MatchBitReversals, BPS); + if (!A || !B) + return Result; + + // Try and merge the two together. + if (!A->Provider || A->Provider != B->Provider) + return Result; + + Result = BitPart(A->Provider, BitWidth); + for (unsigned i = 0; i < A->Provenance.size(); ++i) { + if (A->Provenance[i] != BitPart::Unset && + B->Provenance[i] != BitPart::Unset && + A->Provenance[i] != B->Provenance[i]) + return Result = None; + + if (A->Provenance[i] == BitPart::Unset) + Result->Provenance[i] = B->Provenance[i]; + else + Result->Provenance[i] = A->Provenance[i]; + } + + return Result; + } + + // If this is a logical shift by a constant, recurse then shift the result. + if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { + unsigned BitShift = + cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined. + if (BitShift > BitWidth) + return Result; + + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + Result = Res; + + // Perform the "shift" on BitProvenance. + auto &P = Result->Provenance; + if (I->getOpcode() == Instruction::Shl) { + P.erase(std::prev(P.end(), BitShift), P.end()); + P.insert(P.begin(), BitShift, BitPart::Unset); + } else { + P.erase(P.begin(), std::next(P.begin(), BitShift)); + P.insert(P.end(), BitShift, BitPart::Unset); + } + + return Result; + } + + // If this is a logical 'and' with a mask that clears bits, recurse then + // unset the appropriate bits. + if (I->getOpcode() == Instruction::And && + isa<ConstantInt>(I->getOperand(1))) { + APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); + const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); + + // Check that the mask allows a multiple of 8 bits for a bswap, for an + // early exit. + unsigned NumMaskedBits = AndMask.countPopulation(); + if (!MatchBitReversals && NumMaskedBits % 8 != 0) + return Result; + + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + Result = Res; + + for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) + // If the AndMask is zero for this bit, clear the bit. + if ((AndMask & Bit) == 0) + Result->Provenance[i] = BitPart::Unset; + + return Result; + } + } + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap/bitreverse. + Result = BitPart(V, BitWidth); + for (unsigned i = 0; i < BitWidth; ++i) + Result->Provenance[i] = i; + return Result; +} + +static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, + unsigned BitWidth) { + if (From % 8 != To % 8) + return false; + // Convert from bit indices to byte indices and check for a byte reversal. + From >>= 3; + To >>= 3; + BitWidth >>= 3; + return From == BitWidth - To - 1; +} + +static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, + unsigned BitWidth) { + return From == BitWidth - To - 1; +} + +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +bool llvm::recognizeBitReverseOrBSwapIdiom( + Instruction *I, bool MatchBSwaps, bool MatchBitReversals, + SmallVectorImpl<Instruction *> &InsertedInsts) { + if (Operator::getOpcode(I) != Instruction::Or) + return false; + if (!MatchBSwaps && !MatchBitReversals) + return false; + IntegerType *ITy = dyn_cast<IntegerType>(I->getType()); + if (!ITy || ITy->getBitWidth() > 128) + return false; // Can't do vectors or integers > 128 bits. + unsigned BW = ITy->getBitWidth(); + + // Try to find all the pieces corresponding to the bswap. + std::map<Value *, Optional<BitPart>> BPS; + auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS); + if (!Res) + return false; + auto &BitProvenance = Res->Provenance; + + // Now, is the bit permutation correct for a bswap or a bitreverse? We can + // only byteswap values with an even number of bytes. + bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; + for (unsigned i = 0; i < BW; ++i) { + OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); + OKForBitReverse &= + bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); + } + + Intrinsic::ID Intrin; + if (OKForBSwap && MatchBSwaps) + Intrin = Intrinsic::bswap; + else if (OKForBitReverse && MatchBitReversals) + Intrin = Intrinsic::bitreverse; + else + return false; + + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); + InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); + return true; +} |