diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 138 |
1 files changed, 67 insertions, 71 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index c1244225..9c4b496 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -37,6 +37,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -409,6 +410,8 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "ARM branch conditions have two components!"); + // For conditional branches, we use addOperand to preserve CPSR flags. + if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) @@ -417,13 +420,13 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); } else BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + .addImm(Cond[0].getImm()).addOperand(Cond[1]); return 1; } // Two-way conditional branch. BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + .addImm(Cond[0].getImm()).addOperand(Cond[1]); if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); else @@ -624,6 +627,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2MOVi32imm: return 8; case ARM::CONSTPOOL_ENTRY: + case ARM::JUMPTABLE_INSTS: + case ARM::JUMPTABLE_ADDRS: + case ARM::JUMPTABLE_TBB: + case ARM::JUMPTABLE_TBH: // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI->getOperand(2).getImm(); @@ -638,42 +645,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: - case ARM::t2BR_JT: - case ARM::t2TBB_JT: - case ARM::t2TBH_JT: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. For TBB, each - // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB_JT) - ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = MCID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != nullptr); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = JT[JTI].MBBs.size(); - if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction - // instead. - ++NumEntries; - return NumEntries * EntrySize + InstSize; - } case ARM::SPACE: return MI->getOperand(1).getImm(); } @@ -1431,7 +1402,7 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const { - int Opcode = MI0->getOpcode(); + unsigned Opcode = MI0->getOpcode(); if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci || @@ -1684,6 +1655,33 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, if (!NumCycles) return false; + // If we are optimizing for size, see if the branch in the predecessor can be + // lowered to cbn?z by the constant island lowering pass, and return false if + // so. This results in a shorter instruction sequence. + const Function *F = MBB.getParent()->getFunction(); + if (F->hasFnAttribute(Attribute::OptimizeForSize) || + F->hasFnAttribute(Attribute::MinSize)) { + MachineBasicBlock *Pred = *MBB.pred_begin(); + if (!Pred->empty()) { + MachineInstr *LastMI = &*Pred->rbegin(); + if (LastMI->getOpcode() == ARM::t2Bcc) { + MachineBasicBlock::iterator CmpMI = LastMI; + if (CmpMI != Pred->begin()) { + --CmpMI; + if (CmpMI->getOpcode() == ARM::tCMPi8 || + CmpMI->getOpcode() == ARM::t2CMPri) { + unsigned Reg = CmpMI->getOperand(0).getReg(); + unsigned PredReg = 0; + ARMCC::CondCodes P = getInstrPredicate(CmpMI, PredReg); + if (P == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && + isARMLowRegister(Reg)) + return false; + } + } + } + } + } + // Attempt to estimate the relative costs of predication versus branching. unsigned UnpredCost = Probability.getNumerator() * NumCycles; UnpredCost /= Probability.getDenominator(); @@ -1741,7 +1739,7 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { } -int llvm::getMatchingCondBranchOpcode(int Opc) { +unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) { if (Opc == ARM::B) return ARM::Bcc; if (Opc == ARM::tB) @@ -1809,8 +1807,7 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, return nullptr; } bool DontMoveAcrossStores = true; - if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr, - DontMoveAcrossStores)) + if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) return nullptr; return MI; } @@ -1891,6 +1888,13 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, SeenMIs.insert(NewMI); SeenMIs.erase(DefMI); + // If MI is inside a loop, and DefMI is outside the loop, then kill flags on + // DefMI would be invalid when tranferred inside the loop. Checking for a + // loop is expensive, but at least remove kill flags if they are in different + // BBs. + if (DefMI->getParent() != MI->getParent()) + NewMI->clearKillInfo(); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -1991,8 +1995,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize)) + if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -2286,16 +2289,6 @@ static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) return true; break; - case ARM::COPY: { - // Walk down one instruction which is potentially an 'and'. - const MachineInstr &Copy = *MI; - MachineBasicBlock::iterator AND( - std::next(MachineBasicBlock::iterator(MI))); - if (AND == MI->getParent()->end()) return false; - MI = AND; - return isSuitableForMask(MI, Copy.getOperand(0).getReg(), - CmpMask, true); - } } return false; @@ -3665,9 +3658,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) + if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -4118,19 +4109,21 @@ enum ARMExeDomain { // std::pair<uint16_t, uint16_t> ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON - // if they are not predicated. - if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) - return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - - // CortexA9 is particularly picky about mixing the two and wants these - // converted. - if (Subtarget.isCortexA9() && !isPredicated(MI) && - (MI->getOpcode() == ARM::VMOVRS || - MI->getOpcode() == ARM::VMOVSR || - MI->getOpcode() == ARM::VMOVS)) - return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - + // If we don't have access to NEON instructions then we won't be able + // to swizzle anything to the NEON domain. Check to make sure. + if (Subtarget.hasNEON()) { + // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON + // if they are not predicated. + if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + + // CortexA9 is particularly picky about mixing the two and wants these + // converted. + if (Subtarget.isCortexA9() && !isPredicated(MI) && + (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || + MI->getOpcode() == ARM::VMOVS)) + return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON)); + } // No other instructions can be swizzled, so just determine their domain. unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; @@ -4223,6 +4216,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // Zap the predicate operands. assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + // Make sure we've got NEON instructions. + assert(Subtarget.hasNEON() && "VORRd requires NEON"); + // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits) DstReg = MI->getOperand(0).getReg(); SrcReg = MI->getOperand(1).getReg(); @@ -4510,7 +4506,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, } bool ARMBaseInstrInfo::hasNOP() const { - return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; + return Subtarget.getFeatureBits()[ARM::HasV6KOps]; } bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { |