diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 112 |
1 files changed, 78 insertions, 34 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8e312dc..9a7f45b 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -18,17 +18,27 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <limits> + using namespace llvm; #define DEBUG_TYPE "aarch64-ldst-opt" @@ -58,15 +68,15 @@ typedef struct LdStPairFlags { // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. - bool MergeForward; + bool MergeForward = false; // SExtIdx gives the index of the result of the load pair that must be // extended. The value of SExtIdx assumes that the paired load produces the // value in this order: (I, returned iterator), i.e., -1 means no value has // to be extended, 0 means I, and 1 means the returned iterator. - int SExtIdx; + int SExtIdx = -1; - LdStPairFlags() : MergeForward(false), SExtIdx(-1) {} + LdStPairFlags() = default; void setMergeForward(bool V = true) { MergeForward = V; } bool getMergeForward() const { return MergeForward; } @@ -78,10 +88,12 @@ typedef struct LdStPairFlags { struct AArch64LoadStoreOpt : public MachineFunctionPass { static char ID; + AArch64LoadStoreOpt() : MachineFunctionPass(ID) { initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry()); } + AliasAnalysis *AA; const AArch64InstrInfo *TII; const TargetRegisterInfo *TRI; const AArch64Subtarget *Subtarget; @@ -89,6 +101,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Track which registers have been modified and used. BitVector ModifiedRegs, UsedRegs; + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. // Return the matching instruction if one is found, else MBB->end(). @@ -162,8 +179,10 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; } }; + char AArch64LoadStoreOpt::ID = 0; -} // namespace + +} // end anonymous namespace INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt", AARCH64_LOAD_STORE_OPT_NAME, false, false) @@ -246,7 +265,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc, default: if (IsValidLdStrOpc) *IsValidLdStrOpc = false; - return UINT_MAX; + return std::numeric_limits<unsigned>::max(); case AArch64::STRDui: case AArch64::STURDi: case AArch64::STRQui: @@ -369,6 +388,10 @@ static unsigned isMatchingStore(MachineInstr &LoadInst, } static unsigned getPreIndexedOpcode(unsigned Opc) { + // FIXME: We don't currently support creating pre-indexed loads/stores when + // the load or store is the unscaled version. If we decide to perform such an + // optimization in the future the cases for the unscaled loads/stores will + // need to be added here. switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); @@ -432,32 +455,42 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { default: llvm_unreachable("Opcode has no post-indexed wise equivalent!"); case AArch64::STRSui: + case AArch64::STURSi: return AArch64::STRSpost; case AArch64::STRDui: + case AArch64::STURDi: return AArch64::STRDpost; case AArch64::STRQui: + case AArch64::STURQi: return AArch64::STRQpost; case AArch64::STRBBui: return AArch64::STRBBpost; case AArch64::STRHHui: return AArch64::STRHHpost; case AArch64::STRWui: + case AArch64::STURWi: return AArch64::STRWpost; case AArch64::STRXui: + case AArch64::STURXi: return AArch64::STRXpost; case AArch64::LDRSui: + case AArch64::LDURSi: return AArch64::LDRSpost; case AArch64::LDRDui: + case AArch64::LDURDi: return AArch64::LDRDpost; case AArch64::LDRQui: + case AArch64::LDURQi: return AArch64::LDRQpost; case AArch64::LDRBBui: return AArch64::LDRBBpost; case AArch64::LDRHHui: return AArch64::LDRHHpost; case AArch64::LDRWui: + case AArch64::LDURWi: return AArch64::LDRWpost; case AArch64::LDRXui: + case AArch64::LDURXi: return AArch64::LDRXpost; case AArch64::LDRSWui: return AArch64::LDRSWpost; @@ -595,7 +628,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, MachineInstrBuilder MIB; MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc))) .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR) - .addOperand(BaseRegOp) + .add(BaseRegOp) .addImm(OffsetImm) .setMemRefs(I->mergeMemRefsWith(*MergeMI)); (void)MIB; @@ -709,9 +742,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, } } MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc))) - .addOperand(RegOp0) - .addOperand(RegOp1) - .addOperand(BaseRegOp) + .add(RegOp0) + .add(RegOp1) + .add(BaseRegOp) .addImm(OffsetImm) .setMemRefs(I->mergeMemRefsWith(*Paired)); @@ -776,6 +809,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, int LoadSize = getMemScale(*LoadI); int StoreSize = getMemScale(*StoreI); unsigned LdRt = getLdStRegOp(*LoadI).getReg(); + const MachineOperand &StMO = getLdStRegOp(*StoreI); unsigned StRt = getLdStRegOp(*StoreI).getReg(); bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt); @@ -788,7 +822,13 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, // Remove the load, if the destination register of the loads is the same // register for stored value. if (StRt == LdRt && LoadSize == 8) { - StoreI->clearRegisterKills(StRt, TRI); + for (MachineInstr &MI : make_range(StoreI->getIterator(), + LoadI->getIterator())) { + if (MI.killsRegister(StRt, TRI)) { + MI.clearRegisterKills(StRt, TRI); + break; + } + } DEBUG(dbgs() << "Remove load instruction:\n "); DEBUG(LoadI->print(dbgs())); DEBUG(dbgs() << "\n"); @@ -800,7 +840,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt) .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR) - .addReg(StRt) + .add(StMO) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { // FIXME: Currently we disable this transformation in big-endian targets as @@ -841,14 +881,14 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri), DestReg) - .addReg(StRt) + .add(StMO) .addImm(AndMaskEncoded); } else { BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(), TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri), DestReg) - .addReg(StRt) + .add(StMO) .addImm(Immr) .addImm(Imms); } @@ -857,7 +897,10 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI, // Clear kill flags between store and load. for (MachineInstr &MI : make_range(StoreI->getIterator(), BitExtMI->getIterator())) - MI.clearRegisterKills(StRt, TRI); + if (MI.killsRegister(StRt, TRI)) { + MI.clearRegisterKills(StRt, TRI); + break; + } DEBUG(dbgs() << "Promoting load by replacing :\n "); DEBUG(StoreI->print(dbgs())); @@ -923,7 +966,7 @@ static int alignTo(int Num, int PowOf2) { } static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, - const AArch64InstrInfo *TII) { + AliasAnalysis *AA) { // One of the instructions must modify memory. if (!MIa.mayStore() && !MIb.mayStore()) return false; @@ -932,14 +975,14 @@ static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb, if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore()) return false; - return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb); + return MIa.mayAlias(AA, MIb, /*UseTBAA*/false); } static bool mayAlias(MachineInstr &MIa, SmallVectorImpl<MachineInstr *> &MemInsns, - const AArch64InstrInfo *TII) { + AliasAnalysis *AA) { for (MachineInstr *MIb : MemInsns) - if (mayAlias(MIa, *MIb, TII)) + if (mayAlias(MIa, *MIb, AA)) return true; return false; @@ -997,7 +1040,7 @@ bool AArch64LoadStoreOpt::findMatchingStore( return false; // If we encounter a store aliased with the load, return early. - if (MI.mayStore() && mayAlias(LoadMI, MI, TII)) + if (MI.mayStore() && mayAlias(LoadMI, MI, AA)) return false; } while (MBBI != B && Count < Limit); return false; @@ -1167,7 +1210,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // first. if (!ModifiedRegs[getLdStRegOp(MI).getReg()] && !(MI.mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) && - !mayAlias(MI, MemInsns, TII)) { + !mayAlias(MI, MemInsns, AA)) { Flags.setMergeForward(false); return MBBI; } @@ -1178,7 +1221,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // into the second. if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] && !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) && - !mayAlias(FirstMI, MemInsns, TII)) { + !mayAlias(FirstMI, MemInsns, AA)) { Flags.setMergeForward(true); return MBBI; } @@ -1233,19 +1276,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, if (!isPairedLdSt(*I)) { // Non-paired instruction. MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(getLdStRegOp(*Update)) - .addOperand(getLdStRegOp(*I)) - .addOperand(getLdStBaseOp(*I)) + .add(getLdStRegOp(*Update)) + .add(getLdStRegOp(*I)) + .add(getLdStBaseOp(*I)) .addImm(Value) .setMemRefs(I->memoperands_begin(), I->memoperands_end()); } else { // Paired instruction. int Scale = getMemScale(*I); MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(getLdStRegOp(*Update)) - .addOperand(getLdStRegOp(*I, 0)) - .addOperand(getLdStRegOp(*I, 1)) - .addOperand(getLdStBaseOp(*I)) + .add(getLdStRegOp(*Update)) + .add(getLdStRegOp(*I, 0)) + .add(getLdStRegOp(*I, 1)) + .add(getLdStBaseOp(*I)) .addImm(Value / Scale) .setMemRefs(I->memoperands_begin(), I->memoperands_end()); } @@ -1545,7 +1588,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, case AArch64::LDURBBi: case AArch64::LDURHHi: case AArch64::LDURWi: - case AArch64::LDURXi: { + case AArch64::LDURXi: if (tryToPromoteLoadFromStore(MBBI)) { Modified = true; break; @@ -1553,7 +1596,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++MBBI; break; } - } } // 2) Merge adjacent zero stores into a wider store. // e.g., @@ -1666,8 +1708,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, ++NumPostFolded; break; } - // Don't know how to handle pre/post-index versions, so move to the next - // instruction. + + // Don't know how to handle unscaled pre/post-index versions below, so + // move to the next instruction. if (TII->isUnscaledLdSt(Opc)) { ++MBBI; break; @@ -1722,6 +1765,7 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget()); TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo()); TRI = Subtarget->getRegisterInfo(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); // Resize the modified and used register bitfield trackers. We do this once // per function and then clear the bitfield each time we optimize a load or |