diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp')
-rw-r--r-- | contrib/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp new file mode 100644 index 0000000..421946d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp @@ -0,0 +1,380 @@ +//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions with +// appropriate (intrinsic-based) ldrex/strex loops. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "arm-atomic-expand" + +namespace { + class AtomicExpandLoadLinked : public FunctionPass { + const TargetMachine *TM; + public: + static char ID; // Pass identification, replacement for typeid + explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM) { + initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + bool expandAtomicInsts(Function &F); + + bool expandAtomicLoad(LoadInst *LI); + bool expandAtomicStore(StoreInst *LI); + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + + AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + }; +} + +char AtomicExpandLoadLinked::ID = 0; +char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID; +INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc", + "Expand Atomic calls in terms of load-linked & store-conditional", + false, false) + +FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { + return new AtomicExpandLoadLinked(TM); +} + +bool AtomicExpandLoadLinked::runOnFunction(Function &F) { + if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked()) + return false; + + SmallVector<Instruction *, 1> AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (BasicBlock &BB : F) + for (Instruction &Inst : BB) { + if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) || + (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) || + (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) + AtomicInsts.push_back(&Inst); + } + + bool MadeChange = false; + for (Instruction *Inst : AtomicInsts) { + if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst)) + continue; + + if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) + MadeChange |= expandAtomicRMW(AI); + else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst)) + MadeChange |= expandAtomicCmpXchg(CI); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + MadeChange |= expandAtomicLoad(LI); + else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + MadeChange |= expandAtomicStore(SI); + else + llvm_unreachable("Unknown atomic instruction"); + } + + return MadeChange; +} + +bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { + // Load instructions don't actually need a leading fence, even in the + // SequentiallyConsistent case. + AtomicOrdering MemOpOrder = + TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic + : LI->getOrdering(); + + // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is + // an ldrexd (A3.5.3). + IRBuilder<> Builder(LI); + Value *Val = TM->getTargetLowering()->emitLoadLinked( + Builder, LI->getPointerOperand(), MemOpOrder); + + insertTrailingFence(Builder, LI->getOrdering()); + + LI->replaceAllUsesWith(Val); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) { + // The only atomic 64-bit store on ARM is an strexd that succeeds, which means + // we need a loop and the entire instruction is essentially an "atomicrmw + // xchg" that ignores the value loaded. + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + return expandAtomicRMW(AI); +} + +bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { + AtomicOrdering Order = AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // fence? + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + // atomicrmw.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); + + Value *NewVal; + switch (AI->getOperation()) { + case AtomicRMWInst::Xchg: + NewVal = AI->getValOperand(); + break; + case AtomicRMWInst::Add: + NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Sub: + NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::And: + NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Nand: + NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), + "new"); + break; + case AtomicRMWInst::Or: + NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Xor: + NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + default: + llvm_unreachable("Unknown atomic op"); + } + + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( + Builder, NewVal, Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + insertTrailingFence(Builder, Order); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); + + return true; +} + +bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); + AtomicOrdering FailureOrder = CI->getFailureOrdering(); + Value *Addr = CI->getPointerOperand(); + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord + // + // The full expansion we produce is: + // [...] + // fence? + // cmpxchg.start: + // %loaded = @load.linked(%addr) + // %should_store = icmp eq %loaded, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.failure + // cmpxchg.trystore: + // %stored = @store_conditional(%new, %addr) + // %success = icmp eq i32 %stored, 0 + // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // cmpxchg.success: + // fence? + // br label %cmpxchg.end + // cmpxchg.failure: + // fence? + // br label %cmpxchg.end + // cmpxchg.end: + // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 + // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); + auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + + // This grabs the DebugLoc from CI + IRBuilder<> Builder(CI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = + Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + + // If the the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + + Builder.SetInsertPoint(TryStoreBB); + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( + Builder, CI->getNewValOperand(), Addr, MemOpOrder); + StoreSuccess = Builder.CreateICmpEQ( + StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + Builder.CreateCondBr(StoreSuccess, SuccessBB, + CI->isWeak() ? FailureBB : LoopBB); + + // Make sure later instructions don't get reordered with a fence if necessary. + Builder.SetInsertPoint(SuccessBB); + insertTrailingFence(Builder, SuccessOrder); + Builder.CreateBr(ExitBB); + + Builder.SetInsertPoint(FailureBB); + insertTrailingFence(Builder, FailureOrder); + Builder.CreateBr(ExitBB); + + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + SmallVector<ExtractValueInst *, 2> PrunedInsts; + for (auto User : CI->users()) { + ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); + if (!EV) + continue; + + assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && + "weird extraction from { iN, i1 }"); + + if (EV->getIndices()[0] == 0) + EV->replaceAllUsesWith(Loaded); + else + EV->replaceAllUsesWith(Success); + + PrunedInsts.push_back(EV); + } + + // We can remove the instructions now we're no longer iterating through them. + for (auto EV : PrunedInsts) + EV->eraseFromParent(); + + if (!CI->use_empty()) { + // Some use of the full struct return that we don't understand has happened, + // so we've got to reconstruct it properly. + Value *Res; + Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + } + + CI->eraseFromParent(); + return true; +} + +AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) + return Ord; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + Builder.CreateFence(Release); + + // The exclusive operations don't need any barrier if we're adding separate + // fences. + return Monotonic; +} + +void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) + return; + + if (Ord == Acquire || Ord == AcquireRelease) + Builder.CreateFence(Acquire); + else if (Ord == SequentiallyConsistent) + Builder.CreateFence(SequentiallyConsistent); +} |