diff options
Diffstat (limited to 'contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp | 415 |
1 files changed, 390 insertions, 25 deletions
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index fe7278f..d75d95a 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -7,14 +7,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexbit" - #include "HexagonBitTracker.h" #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -42,10 +40,23 @@ #include <utility> #include <vector> +#define DEBUG_TYPE "hexbit" + using namespace llvm; static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden, cl::init(true), cl::desc("Preserve subregisters in tied operands")); +static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden, + cl::init(true), cl::desc("Generate extract instructions")); +static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden, + cl::init(true), cl::desc("Generate bitsplit instructions")); + +static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden, + cl::init(UINT_MAX)); +static unsigned CountExtract = 0; +static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden, + cl::init(UINT_MAX)); +static unsigned CountBitSplit = 0; namespace llvm { @@ -249,8 +260,6 @@ INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs) { - MachineDomTreeNode *N = MDT->getNode(&B); - typedef GraphTraits<MachineDomTreeNode*> GTN; bool Changed = false; if (T.TopDown) @@ -262,10 +271,9 @@ bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet NewAVs = AVs; NewAVs.insert(Defs); - for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { - MachineBasicBlock *SB = (*I)->getBlock(); - Changed |= visitBlock(*SB, T, NewAVs); - } + for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B))) + Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs); + if (!T.TopDown) Changed |= T.processBlock(B, AVs); @@ -399,7 +407,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); if (RR.Sub == 0) { Begin = 0; - Width = RC->getSize()*8; + Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC); return true; } @@ -409,7 +417,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, case Hexagon::DoubleRegsRegClassID: case Hexagon::VecDblRegsRegClassID: case Hexagon::VecDblRegs128BRegClassID: - Width = RC->getSize()*8 / 2; + Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2; if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) Begin = Width; break; @@ -896,6 +904,7 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass( *MRI.getTargetRegisterInfo()); auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void { + (void)HRI; assert(Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo) || Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi)); }; @@ -983,9 +992,9 @@ bool DeadCodeElimination::isDead(unsigned R) const { bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) { bool Changed = false; - typedef GraphTraits<MachineDomTreeNode*> GTN; - for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) - Changed |= runOnNode(*I); + + for (auto *DTN : children<MachineDomTreeNode*>(N)) + Changed |= runOnNode(DTN); MachineBasicBlock *B = N->getBlock(); std::vector<MachineInstr*> Instrs; @@ -1045,8 +1054,8 @@ namespace { class RedundantInstrElimination : public Transformation { public: RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, - MachineRegisterInfo &mri) - : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) + : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; @@ -1061,6 +1070,7 @@ namespace { bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); const HexagonInstrInfo &HII; + const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; BitTracker &BT; }; @@ -1253,7 +1263,7 @@ bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, assert(MI.getOperand(OpN).isReg()); BitTracker::RegisterRef RR = MI.getOperand(OpN); const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); - uint16_t Width = RC->getSize()*8; + uint16_t Width = HRI.getRegSizeInBits(*RC); if (!GotBits) T.set(Begin, Begin+Width); @@ -1735,10 +1745,11 @@ namespace { // This is by no means complete class BitSimplification : public Transformation { public: - BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii, - const HexagonRegisterInfo &hri, MachineRegisterInfo &mri, - MachineFunction &mf) - : Transformation(true), HII(hii), HRI(hri), MRI(mri), MF(mf), BT(bt) {} + BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt, + const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri, + MachineRegisterInfo &mri, MachineFunction &mf) + : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri), + MF(mf), BT(bt) {} bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; @@ -1765,9 +1776,18 @@ namespace { const BitTracker::RegisterCell &RC); bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC); + bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC, const RegisterSet &AVs); bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC); + bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD, + const BitTracker::RegisterCell &RC, const RegisterSet &AVs); + // Cache of created instructions to avoid creating duplicates. + // XXX Currently only used by genBitSplit. + std::vector<MachineInstr*> NewMIs; + + const MachineDominatorTree &MDT; const HexagonInstrInfo &HII; const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; @@ -1927,8 +1947,10 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) { switch (Opc) { case Hexagon::S2_storeri_io: Align++; + LLVM_FALLTHROUGH; case Hexagon::S2_storerh_io: Align++; + LLVM_FALLTHROUGH; case Hexagon::S2_storerb_io: break; default: @@ -2149,6 +2171,149 @@ bool BitSimplification::genExtractLow(MachineInstr *MI, return false; } +bool BitSimplification::genBitSplit(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, + const RegisterSet &AVs) { + if (!GenBitSplit) + return false; + if (MaxBitSplit.getNumOccurrences()) { + if (CountBitSplit >= MaxBitSplit) + return false; + } + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case Hexagon::A4_bitsplit: + case Hexagon::A4_bitspliti: + return false; + } + + unsigned W = RC.width(); + if (W != 32) + return false; + + auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned { + unsigned Z = C.width(); + while (Z > 0 && C[Z-1].is(0)) + --Z; + return C.width() - Z; + }; + + // Count the number of leading zeros in the target RC. + unsigned Z = ctlz(RC); + if (Z == 0 || Z == W) + return false; + + // A simplistic analysis: assume the source register (the one being split) + // is fully unknown, and that all its bits are self-references. + const BitTracker::BitValue &B0 = RC[0]; + if (B0.Type != BitTracker::BitValue::Ref) + return false; + + unsigned SrcR = B0.RefI.Reg; + unsigned SrcSR = 0; + unsigned Pos = B0.RefI.Pos; + + // All the non-zero bits should be consecutive bits from the same register. + for (unsigned i = 1; i < W-Z; ++i) { + const BitTracker::BitValue &V = RC[i]; + if (V.Type != BitTracker::BitValue::Ref) + return false; + if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i) + return false; + } + + // Now, find the other bitfield among AVs. + for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) { + // The number of leading zeros here should be the number of trailing + // non-zeros in RC. + if (!BT.has(S)) + continue; + const BitTracker::RegisterCell &SC = BT.lookup(S); + if (SC.width() != W || ctlz(SC) != W-Z) + continue; + // The Z lower bits should now match SrcR. + const BitTracker::BitValue &S0 = SC[0]; + if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR) + continue; + unsigned P = S0.RefI.Pos; + + if (Pos <= P && (Pos + W-Z) != P) + continue; + if (P < Pos && (P + Z) != Pos) + continue; + // The starting bitfield position must be at a subregister boundary. + if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32) + continue; + + unsigned I; + for (I = 1; I < Z; ++I) { + const BitTracker::BitValue &V = SC[I]; + if (V.Type != BitTracker::BitValue::Ref) + break; + if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I) + break; + } + if (I != Z) + continue; + + // Generate bitsplit where S is defined. + if (MaxBitSplit.getNumOccurrences()) + CountBitSplit++; + MachineInstr *DefS = MRI.getVRegDef(S); + assert(DefS != nullptr); + DebugLoc DL = DefS->getDebugLoc(); + MachineBasicBlock &B = *DefS->getParent(); + auto At = DefS->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(DefS); + if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID) + SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo; + if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1)) + continue; + unsigned ImmOp = Pos <= P ? W-Z : Z; + + // Find an existing bitsplit instruction if one already exists. + unsigned NewR = 0; + for (MachineInstr *In : NewMIs) { + if (In->getOpcode() != Hexagon::A4_bitspliti) + continue; + MachineOperand &Op1 = In->getOperand(1); + if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR) + continue; + if (In->getOperand(2).getImm() != ImmOp) + continue; + // Check if the target register is available here. + MachineOperand &Op0 = In->getOperand(0); + MachineInstr *DefI = MRI.getVRegDef(Op0.getReg()); + assert(DefI != nullptr); + if (!MDT.dominates(DefI, &*At)) + continue; + + // Found one that can be reused. + assert(Op0.getSubReg() == 0); + NewR = Op0.getReg(); + break; + } + if (!NewR) { + NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass); + auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR) + .addReg(SrcR, 0, SrcSR) + .addImm(ImmOp); + NewMIs.push_back(NewBS); + } + if (Pos <= P) { + HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI); + HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI); + } else { + HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI); + HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI); + } + return true; + } + + return false; +} + // Check for tstbit simplification opportunity, where the bit being checked // can be tracked back to another register. For example: // vreg2 = S2_lsr_i_r vreg1, 5 @@ -2210,6 +2375,203 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI, return false; } +// Detect whether RD is a bitfield extract (sign- or zero-extended) of +// some register from the AVs set. Create a new corresponding instruction +// at the location of MI. The intent is to recognize situations where +// a sequence of instructions performs an operation that is equivalent to +// an extract operation, such as a shift left followed by a shift right. +bool BitSimplification::simplifyExtractLow(MachineInstr *MI, + BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC, + const RegisterSet &AVs) { + if (!GenExtract) + return false; + if (MaxExtract.getNumOccurrences()) { + if (CountExtract >= MaxExtract) + return false; + CountExtract++; + } + + unsigned W = RC.width(); + unsigned RW = W; + unsigned Len; + bool Signed; + + // The code is mostly class-independent, except for the part that generates + // the extract instruction, and establishes the source register (in case it + // needs to use a subregister). + const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI); + if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass) + return false; + assert(RD.Sub == 0); + + // Observation: + // If the cell has a form of 00..0xx..x with k zeros and n remaining + // bits, this could be an extractu of the n bits, but it could also be + // an extractu of a longer field which happens to have 0s in the top + // bit positions. + // The same logic applies to sign-extended fields. + // + // Do not check for the extended extracts, since it would expand the + // search space quite a bit. The search may be expensive as it is. + + const BitTracker::BitValue &TopV = RC[W-1]; + + // Eliminate candidates that have self-referential bits, since they + // cannot be extracts from other registers. Also, skip registers that + // have compile-time constant values. + bool IsConst = true; + for (unsigned I = 0; I != W; ++I) { + const BitTracker::BitValue &V = RC[I]; + if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg) + return false; + IsConst = IsConst && (V.is(0) || V.is(1)); + } + if (IsConst) + return false; + + if (TopV.is(0) || TopV.is(1)) { + bool S = TopV.is(1); + for (--W; W > 0 && RC[W-1].is(S); --W) + ; + Len = W; + Signed = S; + // The sign bit must be a part of the field being extended. + if (Signed) + ++Len; + } else { + // This could still be a sign-extended extract. + assert(TopV.Type == BitTracker::BitValue::Ref); + if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1) + return false; + for (--W; W > 0 && RC[W-1] == TopV; --W) + ; + // The top bits of RC are copies of TopV. One occurrence of TopV will + // be a part of the field. + Len = W + 1; + Signed = true; + } + + // This would be just a copy. It should be handled elsewhere. + if (Len == RW) + return false; + + DEBUG({ + dbgs() << __func__ << " on reg: " << PrintReg(RD.Reg, &HRI, RD.Sub) + << ", MI: " << *MI; + dbgs() << "Cell: " << RC << '\n'; + dbgs() << "Expected bitfield size: " << Len << " bits, " + << (Signed ? "sign" : "zero") << "-extended\n"; + }); + + bool Changed = false; + + for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) { + if (!BT.has(R)) + continue; + const BitTracker::RegisterCell &SC = BT.lookup(R); + unsigned SW = SC.width(); + + // The source can be longer than the destination, as long as its size is + // a multiple of the size of the destination. Also, we would need to be + // able to refer to the subregister in the source that would be of the + // same size as the destination, but only check the sizes here. + if (SW < RW || (SW % RW) != 0) + continue; + + // The field can start at any offset in SC as long as it contains Len + // bits and does not cross subregister boundary (if the source register + // is longer than the destination). + unsigned Off = 0; + while (Off <= SW-Len) { + unsigned OE = (Off+Len)/RW; + if (OE != Off/RW) { + // The assumption here is that if the source (R) is longer than the + // destination, then the destination is a sequence of words of + // size RW, and each such word in R can be accessed via a subregister. + // + // If the beginning and the end of the field cross the subregister + // boundary, advance to the next subregister. + Off = OE*RW; + continue; + } + if (HBS::isEqual(RC, 0, SC, Off, Len)) + break; + ++Off; + } + + if (Off > SW-Len) + continue; + + // Found match. + unsigned ExtOpc = 0; + if (Off == 0) { + if (Len == 8) + ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb; + else if (Len == 16) + ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth; + else if (Len < 10 && !Signed) + ExtOpc = Hexagon::A2_andir; + } + if (ExtOpc == 0) { + ExtOpc = + Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp) + : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup); + } + unsigned SR = 0; + // This only recognizes isub_lo and isub_hi. + if (RW != SW && RW*2 != SW) + continue; + if (RW != SW) + SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi; + Off = Off % RW; + + if (!validateReg({R,SR}, ExtOpc, 1)) + continue; + + // Don't generate the same instruction as the one being optimized. + if (MI->getOpcode() == ExtOpc) { + // All possible ExtOpc's have the source in operand(1). + const MachineOperand &SrcOp = MI->getOperand(1); + if (SrcOp.getReg() == R) + continue; + } + + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &B = *MI->getParent(); + unsigned NewR = MRI.createVirtualRegister(FRC); + auto At = MI->isPHI() ? B.getFirstNonPHI() + : MachineBasicBlock::iterator(MI); + auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR) + .addReg(R, 0, SR); + switch (ExtOpc) { + case Hexagon::A2_sxtb: + case Hexagon::A2_zxtb: + case Hexagon::A2_sxth: + case Hexagon::A2_zxth: + break; + case Hexagon::A2_andir: + MIB.addImm((1u << Len) - 1); + break; + case Hexagon::S4_extract: + case Hexagon::S2_extractu: + case Hexagon::S4_extractp: + case Hexagon::S2_extractup: + MIB.addImm(Len) + .addImm(Off); + break; + default: + llvm_unreachable("Unexpected opcode"); + } + + HBS::replaceReg(RD.Reg, NewR, MRI); + BT.put(BitTracker::RegisterRef(NewR), RC); + Changed = true; + break; + } + + return Changed; +} + bool BitSimplification::processBlock(MachineBasicBlock &B, const RegisterSet &AVs) { if (!BT.reached(&B)) @@ -2247,12 +2609,15 @@ bool BitSimplification::processBlock(MachineBasicBlock &B, if (FRC->getID() == Hexagon::DoubleRegsRegClassID) { bool T = genPackhl(MI, RD, RC); + T = T || simplifyExtractLow(MI, RD, RC, AVB); Changed |= T; continue; } if (FRC->getID() == Hexagon::IntRegsRegClassID) { - bool T = genExtractHalf(MI, RD, RC); + bool T = genBitSplit(MI, RD, RC, AVB); + T = T || simplifyExtractLow(MI, RD, RC, AVB); + T = T || genExtractHalf(MI, RD, RC); T = T || genCombineHalf(MI, RD, RC); T = T || genExtractLow(MI, RD, RC); Changed |= T; @@ -2294,7 +2659,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { Changed |= visitBlock(Entry, ImmG, AIG); RegisterSet ARE; // Available registers for RIE. - RedundantInstrElimination RIE(BT, HII, MRI); + RedundantInstrElimination RIE(BT, HII, HRI, MRI); bool Ried = visitBlock(Entry, RIE, ARE); if (Ried) { Changed = true; @@ -2313,7 +2678,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { BT.run(); RegisterSet ABS; // Available registers for BS. - BitSimplification BitS(BT, HII, HRI, MRI, MF); + BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF); Changed |= visitBlock(Entry, BitS, ABS); Changed = DeadCodeElimination(MF, *MDT).run() || Changed; @@ -2599,7 +2964,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) { const MachineOperand &Op = SI->getOperand(j); if (!Op.isReg()) { - MIB.addOperand(Op); + MIB.add(Op); continue; } if (!Op.isUse()) |