diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 191 |
1 files changed, 100 insertions, 91 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 5d29531..6fa5ad7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -53,11 +53,6 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); -// FIXME: This option should be removed once it has received sufficient testing. -static cl::opt<bool> -AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), - cl::desc("Align constant islands in code")); - /// UnknownPadding - Return the worst case padding that could result from /// unknown offset bits. This does not include alignment padding caused by /// known offset bits. @@ -235,8 +230,8 @@ namespace { MachineInstr *MI; unsigned MaxDisp : 31; bool isCond : 1; - int UncondBr; - ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr) + unsigned UncondBr; + ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr) : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {} }; @@ -306,6 +301,8 @@ namespace { bool optimizeThumb2Instructions(); bool optimizeThumb2Branches(); bool reorderThumb2JumpTables(); + unsigned removeDeadDefinitions(MachineInstr *MI, unsigned BaseReg, + unsigned IdxReg); bool optimizeThumb2JumpTables(); MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); @@ -383,11 +380,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlignment() << " bytes *****\n"); - TII = (const ARMBaseInstrInfo *)MF->getTarget() - .getSubtargetImpl() - ->getInstrInfo(); + STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget()); + TII = STI->getInstrInfo(); AFI = MF->getInfo<ARMFunctionInfo>(); - STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); @@ -414,13 +409,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { MF->RenumberBlocks(); } - // Thumb1 functions containing constant pools get 4-byte alignment. - // This is so we can keep exact track of where the alignment padding goes. - - // ARM and Thumb2 functions need to be 4-byte aligned. - if (!isThumb1) - MF->ensureAlignment(2); // 2 = log2(4) - // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; @@ -437,6 +425,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { CPEMIs.clear(); DEBUG(dumpBBs()); + // Functions with jump tables need an alignment of 4 because they use the ADR + // instruction, which aligns the PC to 4 bytes before adding an offset. + if (!T2JumpTables.empty()) + MF->ensureAlignment(2); /// Remove dead constant pool entries. MadeChange |= removeUnusedCPEntries(); @@ -515,8 +507,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); // Mark the basic block as required by the const-pool. - // If AlignConstantIslands isn't set, use 4-byte alignment for everything. - BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + BB->setAlignment(MaxAlign); // The function needs to be as aligned as the basic blocks. The linker may // move functions around based on their alignment. @@ -532,7 +523,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // identity mapping of CPI's to CPE's. const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const DataLayout &TD = *MF->getSubtarget().getDataLayout(); + const DataLayout &TD = *MF->getTarget().getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); @@ -606,10 +597,6 @@ ARMConstantIslands::CPEntry unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); - // Everything is 4-byte aligned unless AlignConstantIslands is set. - if (!AlignConstantIslands) - return 2; - unsigned CPI = CPEMI->getOperand(1).getIndex(); assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); unsigned Align = MCP->getConstants()[CPI].getAlignment(); @@ -669,7 +656,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { if (I->isDebugValue()) continue; - int Opc = I->getOpcode(); + unsigned Opc = I->getOpcode(); if (I->isBranch()) { bool isCond = false; unsigned Bits = 0; @@ -1764,8 +1751,13 @@ bool ARMConstantIslands::optimizeThumb2Instructions() { bool ARMConstantIslands::optimizeThumb2Branches() { bool MadeChange = false; - for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { - ImmBranch &Br = ImmBranches[i]; + // The order in which branches appear in ImmBranches is approximately their + // order within the function body. By visiting later branches first, we reduce + // the distance between earlier forward branches and their targets, making it + // more likely that the cbn?z optimization, which can only apply to forward + // branches, will succeed. + for (unsigned i = ImmBranches.size(); i != 0; --i) { + ImmBranch &Br = ImmBranches[i-1]; unsigned Opcode = Br.MI->getOpcode(); unsigned NewOpc = 0; unsigned Scale = 1; @@ -1852,6 +1844,79 @@ bool ARMConstantIslands::optimizeThumb2Branches() { return MadeChange; } +/// If we've formed a TBB or TBH instruction, the base register is now +/// redundant. In most cases, the instructions defining it will now be dead and +/// can be tidied up. This function removes them if so, and returns the number +/// of bytes saved. +unsigned ARMConstantIslands::removeDeadDefinitions(MachineInstr *MI, + unsigned BaseReg, + unsigned IdxReg) { + unsigned BytesRemoved = 0; + MachineBasicBlock *MBB = MI->getParent(); + + // Scan backwards to find the instruction that defines the base + // register. Due to post-RA scheduling, we can't count on it + // immediately preceding the branch instruction. + MachineBasicBlock::iterator PrevI = MI; + MachineBasicBlock::iterator B = MBB->begin(); + while (PrevI != B && !PrevI->definesRegister(BaseReg)) + --PrevI; + + // If for some reason we didn't find it, we can't do anything, so + // just skip this one. + if (!PrevI->definesRegister(BaseReg) || PrevI->hasUnmodeledSideEffects() || + PrevI->mayStore()) + return BytesRemoved; + + MachineInstr *AddrMI = PrevI; + unsigned NewBaseReg = BytesRemoved; + + // Examine the instruction that calculates the jumptable entry address. Make + // sure it only defines the base register and kills any uses other than the + // index register. We also need precisely one use to trace backwards to + // (hopefully) the LEA. + for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { + const MachineOperand &MO = AddrMI->getOperand(k); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() != BaseReg) + return BytesRemoved; + + if (MO.isUse() && MO.getReg() != IdxReg) { + if (!MO.isKill() || (NewBaseReg != 0 && NewBaseReg != MO.getReg())) + return BytesRemoved; + NewBaseReg = MO.getReg(); + } + } + + // Want to continue searching for AddrMI, but there are 2 problems: AddrMI is + // going away soon, and even decrementing once may be invalid. + if (PrevI != B) + PrevI = std::prev(PrevI); + + DEBUG(dbgs() << "remove addr: " << *AddrMI); + BytesRemoved += TII->GetInstSizeInBytes(AddrMI); + AddrMI->eraseFromParent(); + + // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction + // that gave us the initial base register definition. + for (; PrevI != B && !PrevI->definesRegister(NewBaseReg); --PrevI) + ; + + // The instruction should be a tLEApcrel or t2LEApcrelJT; we want + // to delete it as well. + MachineInstr *LeaMI = PrevI; + if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && + LeaMI->getOpcode() != ARM::t2LEApcrelJT) || + LeaMI->getOperand(0).getReg() != NewBaseReg) + return BytesRemoved; + + DEBUG(dbgs() << "remove lea: " << *LeaMI); + BytesRemoved += TII->GetInstSizeInBytes(LeaMI); + LeaMI->eraseFromParent(); + return BytesRemoved; +} + /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. bool ARMConstantIslands::optimizeThumb2JumpTables() { @@ -1867,7 +1932,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1899,78 +1964,22 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { unsigned IdxReg = MI->getOperand(1).getReg(); bool IdxRegKill = MI->getOperand(1).isKill(); - // Scan backwards to find the instruction that defines the base - // register. Due to post-RA scheduling, we can't count on it - // immediately preceding the branch instruction. - MachineBasicBlock::iterator PrevI = MI; - MachineBasicBlock::iterator B = MBB->begin(); - while (PrevI != B && !PrevI->definesRegister(BaseReg)) - --PrevI; - - // If for some reason we didn't find it, we can't do anything, so - // just skip this one. - if (!PrevI->definesRegister(BaseReg)) - continue; - - MachineInstr *AddrMI = PrevI; - bool OptOk = true; - // Examine the instruction that calculates the jumptable entry address. - // Make sure it only defines the base register and kills any uses - // other than the index register. - for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { - const MachineOperand &MO = AddrMI->getOperand(k); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef() && MO.getReg() != BaseReg) { - OptOk = false; - break; - } - if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) { - OptOk = false; - break; - } - } - if (!OptOk) - continue; - - // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction - // that gave us the initial base register definition. - for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI) - ; - - // The instruction should be a tLEApcrel or t2LEApcrelJT; we want - // to delete it as well. - MachineInstr *LeaMI = PrevI; - if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && - LeaMI->getOpcode() != ARM::t2LEApcrelJT) || - LeaMI->getOperand(0).getReg() != BaseReg) - OptOk = false; - - if (!OptOk) - continue; - - DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI - << " lea: " << *LeaMI); + DEBUG(dbgs() << "Shrink JT: " << *MI); unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; MachineBasicBlock::iterator MI_JT = MI; MachineInstr *NewJTMI = BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) .addReg(IdxReg, getKillRegState(IdxRegKill)) - .addJumpTableIndex(JTI, JTOP.getTargetFlags()) - .addImm(MI->getOperand(JTOpIdx+1).getImm()); + .addJumpTableIndex(JTI, JTOP.getTargetFlags()); DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); // FIXME: Insert an "ALIGN" instruction to ensure the next instruction // is 2-byte aligned. For now, asm printer will fix it up. unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); - unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI); - OrigSize += TII->GetInstSizeInBytes(LeaMI); - OrigSize += TII->GetInstSizeInBytes(MI); - - AddrMI->eraseFromParent(); - LeaMI->eraseFromParent(); + unsigned OrigSize = TII->GetInstSizeInBytes(MI); + unsigned DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg); MI->eraseFromParent(); - int delta = OrigSize - NewSize; + int delta = OrigSize - NewSize + DeadSize; BBInfo[MBB->getNumber()].Size -= delta; adjustBBOffsetsAfter(MBB); @@ -1995,7 +2004,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); |