diff options
Diffstat (limited to 'lib/Target/ARM')
73 files changed, 5043 insertions, 2265 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp new file mode 100644 index 0000000..f0d4dbe --- /dev/null +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -0,0 +1,704 @@ +//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The Cortex-A15 processor employs a tracking scheme in its register renaming +// in order to process each instruction's micro-ops speculatively and +// out-of-order with appropriate forwarding. The ARM architecture allows VFP +// instructions to read and write 32-bit S-registers. Each S-register +// corresponds to one half (upper or lower) of an overlaid 64-bit D-register. +// +// There are several instruction patterns which can be used to provide this +// capability which can provide higher performance than other, potentially more +// direct patterns, specifically around when one micro-op reads a D-register +// operand that has recently been written as one or more S-register results. +// +// This file defines a pre-regalloc pass which looks for SPR producers which +// are going to be used by a DPR (or QPR) consumers and creates the more +// optimized access pattern. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "a15-sd-optimizer" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" +#include "ARMISelLowering.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <set> + +using namespace llvm; + +namespace { + struct A15SDOptimizer : public MachineFunctionPass { + static char ID; + A15SDOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM A15 S->D optimizer"; + } + + private: + const ARMBaseInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + bool runOnInstruction(MachineInstr *MI); + + // + // Instruction builder helpers + // + unsigned createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, + bool QPR=false); + + unsigned createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC); + + unsigned createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1); + + unsigned createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2); + + unsigned createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert); + + unsigned createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL); + + // + // Various property checkers + // + bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); + bool hasPartialWrite(MachineInstr *MI); + SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); + unsigned getDPRLaneFromSPR(unsigned SReg); + + // + // Methods used for getting the definitions of partial registers + // + + MachineInstr *elideCopies(MachineInstr *MI); + void elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs); + + // + // Pattern optimization methods + // + unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); + unsigned optimizeSDPattern(MachineInstr *MI); + unsigned getPrefSPRLane(unsigned SReg); + + // + // Sanitizing method - used to make sure if don't leave dead code around. + // + void eraseInstrWithNoUses(MachineInstr *MI); + + // + // A map used to track the changes done by this pass. + // + std::map<MachineInstr*, unsigned> Replacements; + std::set<MachineInstr *> DeadInstr; + }; + char A15SDOptimizer::ID = 0; +} // end anonymous namespace + +// Returns true if this is a use of a SPR register. +bool A15SDOptimizer::usesRegClass(MachineOperand &MO, + const TargetRegisterClass *TRC) { + if (!MO.isReg()) + return false; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); + else + return TRC->contains(Reg); +} + +unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, + &ARM::DPRRegClass); + if (DReg != ARM::NoRegister) return ARM::ssub_1; + return ARM::ssub_0; +} + +// Get the subreg type that is most likely to be coalesced +// for an SPR register that will be used in VDUP32d pseudo. +unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { + if (!TRI->isVirtualRegister(SReg)) + return getDPRLaneFromSPR(SReg); + + MachineInstr *MI = MRI->getVRegDef(SReg); + if (!MI) return ARM::ssub_0; + MachineOperand *MO = MI->findRegisterDefOperand(SReg); + + assert(MO->isReg() && "Non register operand found!"); + if (!MO) return ARM::ssub_0; + + if (MI->isCopy() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + SReg = MI->getOperand(1).getReg(); + } + + if (TargetRegisterInfo::isVirtualRegister(SReg)) { + if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; + return ARM::ssub_0; + } + return getDPRLaneFromSPR(SReg); +} + +// MI is known to be dead. Figure out what instructions +// are also made dead by this and mark them for removal. +void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { + SmallVector<MachineInstr *, 8> Front; + DeadInstr.insert(MI); + + DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); + Front.push_back(MI); + + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // MI is already known to be dead. We need to see + // if other instructions can also be removed. + for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if ((!MO.isReg()) || (!MO.isUse())) + continue; + unsigned Reg = MO.getReg(); + if (!TRI->isVirtualRegister(Reg)) + continue; + MachineOperand *Op = MI->findRegisterDefOperand(Reg); + + if (!Op) + continue; + + MachineInstr *Def = Op->getParent(); + + // We don't need to do anything if we have already marked + // this instruction as being dead. + if (DeadInstr.find(Def) != DeadInstr.end()) + continue; + + // Check if all the uses of this instruction are marked as + // dead. If so, we can also mark this instruction as being + // dead. + bool IsDead = true; + for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { + MachineOperand &MODef = Def->getOperand(j); + if ((!MODef.isReg()) || (!MODef.isDef())) + continue; + unsigned DefReg = MODef.getReg(); + if (!TRI->isVirtualRegister(DefReg)) { + IsDead = false; + break; + } + for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg), + EE = MRI->use_end(); + II != EE; ++II) { + // We don't care about self references. + if (&*II == Def) + continue; + if (DeadInstr.find(&*II) == DeadInstr.end()) { + IsDead = false; + break; + } + } + } + + if (!IsDead) continue; + + DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); + DeadInstr.insert(Def); + } + } +} + +// Creates the more optimized patterns and generally does all the code +// transformations in this pass. +unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { + if (MI->isCopy()) { + return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); + } + + if (MI->isInsertSubreg()) { + unsigned DPRReg = MI->getOperand(1).getReg(); + unsigned SPRReg = MI->getOperand(2).getReg(); + + if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { + MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); + + if (DPRMI && SPRMI) { + // See if the first operand of this insert_subreg is IMPLICIT_DEF + MachineInstr *ECDef = elideCopies(DPRMI); + if (ECDef != 0 && ECDef->isImplicitDef()) { + // Another corner case - if we're inserting something that is purely + // a subreg copy of a DPR, just use that DPR. + + MachineInstr *EC = elideCopies(SPRMI); + // Is it a subreg copy of ssub_0? + if (EC && EC->isCopy() && + EC->getOperand(1).getSubReg() == ARM::ssub_0) { + DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); + + // Find the thing we're subreg copying out of - is it of the same + // regclass as DPRMI? (i.e. a DPR or QPR). + unsigned FullReg = SPRMI->getOperand(1).getReg(); + const TargetRegisterClass *TRC = + MRI->getRegClass(MI->getOperand(1).getReg()); + if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { + DEBUG(dbgs() << "Subreg copy is compatible - returning "); + DEBUG(dbgs() << PrintReg(FullReg) << "\n"); + eraseInstrWithNoUses(MI); + return FullReg; + } + } + + return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); + } + } + } + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + // See if all bar one of the operands are IMPLICIT_DEF and insert the + // optimizer pattern accordingly. + unsigned NumImplicit = 0, NumTotal = 0; + unsigned NonImplicitReg = ~0U; + + for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { + if (!MI->getOperand(I).isReg()) + continue; + ++NumTotal; + unsigned OpReg = MI->getOperand(I).getReg(); + + if (!TRI->isVirtualRegister(OpReg)) + break; + + MachineInstr *Def = MRI->getVRegDef(OpReg); + if (!Def) + break; + if (Def->isImplicitDef()) + ++NumImplicit; + else + NonImplicitReg = MI->getOperand(I).getReg(); + } + + if (NumImplicit == NumTotal - 1) + return optimizeAllLanesPattern(MI, NonImplicitReg); + else + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + assert(0 && "Unhandled update pattern!"); + return 0; +} + +// Return true if this MachineInstr inserts a scalar (SPR) value into +// a D or Q register. +bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { + // The only way we can do a partial register update is through a COPY, + // INSERT_SUBREG or REG_SEQUENCE. + if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), + &ARM::SPRRegClass)) + return true; + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + return false; +} + +// Looks through full copies to get the instruction that defines the input +// operand for MI. +MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { + if (!MI->isFullCopy()) + return MI; + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + return NULL; + MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!Def) + return NULL; + return elideCopies(Def); +} + +// Look through full copies and PHIs to get the set of non-copy MachineInstrs +// that can produce MI. +void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs) { + // Looking through PHIs may create loops so we need to track what + // instructions we have visited before. + std::set<MachineInstr *> Reached; + SmallVector<MachineInstr *, 8> Front; + Front.push_back(MI); + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // If we have already explored this MachineInstr, ignore it. + if (Reached.find(MI) != Reached.end()) + continue; + Reached.insert(MI); + if (MI->isPHI()) { + for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { + unsigned Reg = MI->getOperand(I).getReg(); + if (!TRI->isVirtualRegister(Reg)) { + continue; + } + MachineInstr *NewMI = MRI->getVRegDef(Reg); + if (!NewMI) + continue; + Front.push_back(NewMI); + } + } else if (MI->isFullCopy()) { + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + continue; + MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!NewMI) + continue; + Front.push_back(NewMI); + } else { + DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); + Outs.push_back(MI); + } + } +} + +// Return the DPR virtual registers that are read by this machine instruction +// (if any). +SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { + if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || + MI->isKill()) + return SmallVector<unsigned, 8>(); + + SmallVector<unsigned, 8> Defs; + for (unsigned i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + + if (!MO.isReg() || !MO.isUse()) + continue; + if (!usesRegClass(MO, &ARM::DPRRegClass) && + !usesRegClass(MO, &ARM::QPRRegClass)) + continue; + + Defs.push_back(MO.getReg()); + } + return Defs; +} + +// Creates a DPR register from an SPR one by using a VDUP. +unsigned +A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, bool QPR) { + unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : + &ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), + Out) + .addReg(Reg) + .addImm(Lane)); + + return Out; +} + +// Creates a SPR register from a DPR by copying the value in lane 0. +unsigned +A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC) { + unsigned Out = MRI->createVirtualRegister(TRC); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::COPY), Out) + .addReg(DReg, 0, Lane); + + return Out; +} + +// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. +unsigned +A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2) { + unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::REG_SEQUENCE), Out) + .addReg(Reg1) + .addImm(ARM::dsub_0) + .addReg(Reg2) + .addImm(ARM::dsub_1); + return Out; +} + +// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) +// and merges them into one DPR register. +unsigned +A15SDOptimizer::createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(ARM::VEXTd32), Out) + .addReg(Ssub0) + .addReg(Ssub1) + .addImm(1)); + return Out; +} + +unsigned +A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::INSERT_SUBREG), Out) + .addReg(DReg) + .addReg(ToInsert) + .addImm(Lane); + + return Out; +} + +unsigned +A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::IMPLICIT_DEF), Out); + return Out; +} + +// This function inserts instructions in order to optimize interactions between +// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all +// lanes, and the using VEXT instructions to recompose the result. +unsigned +A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { + MachineBasicBlock::iterator InsertPt(MI); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &MBB = *MI->getParent(); + InsertPt++; + unsigned Out; + + if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { + unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_0, &ARM::DPRRegClass); + unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_1, &ARM::DPRRegClass); + + unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); + unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); + Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); + + Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); + + } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { + unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + } else { + assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && + "Found unexpected regclass!"); + + unsigned PrefLane = getPrefSPRLane(Reg); + unsigned Lane; + switch (PrefLane) { + case ARM::ssub_0: Lane = 0; break; + case ARM::ssub_1: Lane = 1; break; + default: llvm_unreachable("Unknown preferred lane!"); + } + + bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); + + Out = createImplicitDef(MBB, InsertPt, DL); + Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); + Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); + eraseInstrWithNoUses(MI); + } + return Out; +} + +bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { + // We look for instructions that write S registers that are then read as + // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and + // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or + // merge two SPR values to form a DPR register. In order avoid false + // positives we make sure that there is an SPR producer so we look past + // COPY and PHI nodes to find it. + // + // The best code pattern for when an SPR producer is going to be used by a + // DPR or QPR consumer depends on whether the other lanes of the + // corresponding DPR/QPR are currently defined. + // + // We can handle these efficiently, depending on the type of + // pseudo-instruction that is producing the pattern + // + // * COPY: * VDUP all lanes and merge the results together + // using VEXTs. + // + // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR + // lane, and the other lane(s) of the DPR/QPR register + // that we are inserting in are undefined, use the + // original DPR/QPR value. + // * Otherwise, fall back on the same stategy as COPY. + // + // * REG_SEQUENCE: * If all except one of the input operands are + // IMPLICIT_DEFs, insert the VDUP pattern for just the + // defined input operand + // * Otherwise, fall back on the same stategy as COPY. + // + + // First, get all the reads of D-registers done by this instruction. + SmallVector<unsigned, 8> Defs = getReadDPRs(MI); + bool Modified = false; + + for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + // Follow the def-use chain for this DPR through COPYs, and also through + // PHIs (which are essentially multi-way COPYs). It is because of PHIs that + // we can end up with multiple defs of this DPR. + + SmallVector<MachineInstr *, 8> DefSrcs; + if (!TRI->isVirtualRegister(*I)) + continue; + MachineInstr *Def = MRI->getVRegDef(*I); + if (!Def) + continue; + + elideCopiesAndPHIs(Def, DefSrcs); + + for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(), + EE = DefSrcs.end(); II != EE; ++II) { + MachineInstr *MI = *II; + + // If we've already analyzed and replaced this operand, don't do + // anything. + if (Replacements.find(MI) != Replacements.end()) + continue; + + // Now, work out if the instruction causes a SPR->DPR dependency. + if (!hasPartialWrite(MI)) + continue; + + // Collect all the uses of this MI's DPR def for updating later. + SmallVector<MachineOperand*, 8> Uses; + unsigned DPRDefReg = MI->getOperand(0).getReg(); + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), + E = MRI->use_end(); I != E; ++I) + Uses.push_back(&I.getOperand()); + + // We can optimize this. + unsigned NewReg = optimizeSDPattern(MI); + + if (NewReg != 0) { + Modified = true; + for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + DEBUG(dbgs() << "Replacing operand " + << **I << " with " + << PrintReg(NewReg) << "\n"); + (*I)->substVirtReg(NewReg, 0, *TRI); + } + } + Replacements[MI] = NewReg; + } + } + return Modified; +} + +bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { + TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + bool Modified = false; + + DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); + + DeadInstr.clear(); + Replacements.clear(); + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + + for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); + MI != ME;) { + Modified |= runOnInstruction(MI++); + } + + } + + for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), + E = DeadInstr.end(); + I != E; ++I) { + (*I)->eraseFromParent(); + } + + return Modified; +} + +FunctionPass *llvm::createA15SDOptimizerPass() { + return new A15SDOptimizer(); +} diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 1446bbb..80e5f37 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalBaseRegPass(); @@ -44,6 +45,9 @@ FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); +/// \brief Creates an ARM-specific Target Transformation Info pass. +ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM); + void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 23974ad..6838084 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -89,6 +89,10 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", + "AvoidMOVsShifterOperand", "true", + "Avoid movs instructions with shifter operand">; + // Some processors perform return stack prediction. CodeGen should avoid issue // "normal" call instructions to callees which do not return. def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true", @@ -106,6 +110,11 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true", "Is microcontroller profile ('M' series)">; +// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. +// See ARMInstrInfo.td for details. +def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", + "NaCl trap">; + // ARM ISAs. def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true", "Support ARM v4T instructions">; @@ -132,11 +141,14 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", include "ARMSchedule.td" // ARM processor families. +def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", + "Cortex-A5 ARM processors", + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", [FeatureVMLxForwarding, @@ -147,6 +159,7 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", [FeatureNEONForFP, FeatureT2XtPk, FeatureVFP4, FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureAvoidPartialCPSR, + FeatureAvoidMOVsShOp, FeatureHasSlowFPVMLx]>; // FIXME: It has not been determined if A15 has these features. @@ -154,6 +167,12 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", [FeatureT2XtPk, FeatureFP16, FeatureAvoidPartialCPSR]>; +def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", + "Cortex-R5 ARM processors", + [FeatureSlowFPBrcc, FeatureHWDivARM, + FeatureHasSlowFPVMLx, + FeatureAvoidPartialCPSR, + FeatureT2XtPk]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; @@ -219,6 +238,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, FeatureDSPThumb2]>; // V7a Processors. +// FIXME: A5 has currently the same Schedule model as A8 +def : ProcessorModel<"cortex-a5", CortexA8Model, + [ProcA5, HasV7Ops, FeatureNEON, FeatureDB, + FeatureVFP4, FeatureDSPThumb2, + FeatureHasRAS]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; @@ -233,6 +257,11 @@ def : ProcessorModel<"cortex-a9-mp", CortexA9Model, def : ProcessorModel<"cortex-a15", CortexA9Model, [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; +// FIXME: R5 has currently the same ProcessorModel as A8. +def : ProcessorModel<"cortex-r5", CortexA8Model, + [ProcR5, HasV7Ops, FeatureDB, + FeatureVFP3, FeatureDSPThumb2, + FeatureHasRAS]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index d439d1d..13ec208 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -25,30 +25,33 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/Module.h" -#include "llvm/Type.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetMachine.h" #include <cctype> using namespace llvm; @@ -181,7 +184,7 @@ namespace { const size_t TagHeaderSize = 1 + 4; Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4); - Streamer.EmitBytes(CurrentVendor, 0); + Streamer.EmitBytes(CurrentVendor); Streamer.EmitIntValue(0, 1); // '\0' Streamer.EmitIntValue(ARMBuildAttrs::File, 1); @@ -191,14 +194,14 @@ namespace { // emit each field as its type (ULEB or String) for (unsigned int i=0; i<Contents.size(); ++i) { AttributeItemType item = Contents[i]; - Streamer.EmitULEB128IntValue(item.Tag, 0); + Streamer.EmitULEB128IntValue(item.Tag); switch (item.Type) { default: llvm_unreachable("Invalid attribute type"); case AttributeItemType::NumericAttribute: - Streamer.EmitULEB128IntValue(item.IntValue, 0); + Streamer.EmitULEB128IntValue(item.IntValue); break; case AttributeItemType::TextAttribute: - Streamer.EmitBytes(item.StringValue.upper(), 0); + Streamer.EmitBytes(item.StringValue.upper()); Streamer.EmitIntValue(0, 1); // '\0' break; } @@ -339,6 +342,11 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); assert(!MO.getSubReg() && "Subregs should be eliminated!"); + if(ARM::GPRPairRegClass.contains(Reg)) { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + Reg = TRI->getSubReg(Reg, ARM::gsub_0); + } O << ARMInstPrinter::getRegisterName(Reg); break; } @@ -398,7 +406,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { } -MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const { +MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { SmallString<60> Name; raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); @@ -527,14 +535,12 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); if (!MO.isReg()) return true; - const TargetRegisterClass &RC = ARM::GPRRegClass; const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); - - unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); - RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. - - unsigned Reg = RC.getRegister(RegIdx); + unsigned Reg = MO.getReg(); + if(!ARM::GPRPairRegClass.contains(Reg)) + return false; + Reg = TRI->getSubReg(Reg, ARM::gsub_1); O << ARMInstPrinter::getRegisterName(Reg); return false; } @@ -656,7 +662,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (MCSym.getInt()) // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + OutStreamer.EmitIntValue(0, 4/*size*/); else // Internal to current translation unit. // @@ -666,7 +672,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // We need to fill in the value for the NLP in those cases. OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), OutContext), - 4/*size*/, 0/*addrspace*/); + 4/*size*/); } Stubs.clear(); @@ -684,7 +690,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.EmitValue(MCSymbolRefExpr:: Create(Stubs[i].second.getPointer(), OutContext), - 4/*size*/, 0/*addrspace*/); + 4/*size*/); } Stubs.clear(); @@ -698,6 +704,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. + if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer)) + MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } //===----------------------------------------------------------------------===// @@ -1051,12 +1062,10 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { OutContext); // If this isn't a TBB or TBH, the entries are direct branch instructions. if (OffsetWidth == 4) { - MCInst BrInst; - BrInst.setOpcode(ARM::t2B); - BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr)); - BrInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - BrInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(BrInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2B) + .addExpr(MBBSymbolExpr) + .addImm(ARMCC::AL) + .addReg(0)); continue; } // Otherwise it's an offset from the dispatch instruction. Construct an @@ -1100,18 +1109,6 @@ void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, printOperand(MI, NOps-2, OS); } -static void populateADROperands(MCInst &Inst, unsigned Dest, - const MCSymbol *Label, - unsigned pred, unsigned ccreg, - MCContext &Ctx) { - const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx); - Inst.addOperand(MCOperand::CreateReg(Dest)); - Inst.addOperand(MCOperand::CreateExpr(SymbolExpr)); - // Add predicate operands. - Inst.addOperand(MCOperand::CreateImm(pred)); - Inst.addOperand(MCOperand::CreateReg(ccreg)); -} - void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(MI->getFlag(MachineInstr::FrameSetup) && "Only instruction which are involved into frame setup code are allowed"); @@ -1288,129 +1285,104 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::tLEApcrel: case ARM::t2LEApcrel: { // FIXME: Need to also handle globals and externals - MCInst TmpInst; - TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR - : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR - : ARM::ADR)); - populateADROperands(TmpInst, MI->getOperand(0).getReg(), - GetCPISymbol(MI->getOperand(1).getIndex()), - MI->getOperand(2).getImm(), MI->getOperand(3).getReg(), - OutContext); - OutStreamer.EmitInstruction(TmpInst); + MCSymbol *CPISymbol = GetCPISymbol(MI->getOperand(1).getIndex()); + OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() == + ARM::t2LEApcrel ? ARM::t2ADR + : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR + : ARM::ADR)) + .addReg(MI->getOperand(0).getReg()) + .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext)) + // Add predicate operands. + .addImm(MI->getOperand(2).getImm()) + .addReg(MI->getOperand(3).getReg())); return; } case ARM::LEApcrelJT: case ARM::tLEApcrelJT: case ARM::t2LEApcrelJT: { - MCInst TmpInst; - TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR - : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR - : ARM::ADR)); - populateADROperands(TmpInst, MI->getOperand(0).getReg(), - GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(), - MI->getOperand(2).getImm()), - MI->getOperand(3).getImm(), MI->getOperand(4).getReg(), - OutContext); - OutStreamer.EmitInstruction(TmpInst); + MCSymbol *JTIPICSymbol = + GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(), + MI->getOperand(2).getImm()); + OutStreamer.EmitInstruction(MCInstBuilder(MI->getOpcode() == + ARM::t2LEApcrelJT ? ARM::t2ADR + : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR + : ARM::ADR)) + .addReg(MI->getOperand(0).getReg()) + .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext)) + // Add predicate operands. + .addImm(MI->getOperand(3).getImm()) + .addReg(MI->getOperand(4).getReg())); return; } // Darwin call instructions are just normal call instructions with different // clobber semantics (they clobber R9). case ARM::BX_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + .addReg(ARM::LR) + .addReg(ARM::PC) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::BX); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - OutStreamer.EmitInstruction(TmpInst); - } + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX) + .addReg(MI->getOperand(0).getReg())); return; } case ARM::tBX_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + .addReg(ARM::LR) + .addReg(ARM::PC) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tBX); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX) + .addReg(MI->getOperand(0).getReg()) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::BMOVPCRX_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + .addReg(ARM::LR) + .addReg(ARM::PC) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addReg(0)); return; } case ARM::BMOVPCB_CALL: { - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) + .addReg(ARM::LR) + .addReg(ARM::PC) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::Bcc); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); - const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); - TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); + .addReg(0)); + + const GlobalValue *GV = MI->getOperand(0).getGlobal(); + MCSymbol *GVSym = Mang->getSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc) + .addExpr(GVSymExpr) // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::MOVi16_ga_pcrel: @@ -1498,15 +1470,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutContext)); // Form and emit the add. - MCInst AddInst; - AddInst.setOpcode(ARM::tADDhirr); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); - // Add predicate operands. - AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - AddInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(AddInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDhirr) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) + .addReg(ARM::PC) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::PICADD: { @@ -1521,17 +1491,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutContext)); // Form and emit the add. - MCInst AddInst; - AddInst.setOpcode(ARM::ADDrr); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - // Add predicate operands. - AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg())); - // Add 's' bit operand (always reg0 for this) - AddInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(AddInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr) + .addReg(MI->getOperand(0).getReg()) + .addReg(ARM::PC) + .addReg(MI->getOperand(1).getReg()) + // Add predicate operands. + .addImm(MI->getOperand(3).getImm()) + .addReg(MI->getOperand(4).getReg()) + // Add 's' bit operand (always reg0 for this) + .addReg(0)); return; } case ARM::PICSTR: @@ -1567,16 +1535,14 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::PICLDRSB: Opcode = ARM::LDRSB; break; case ARM::PICLDRSH: Opcode = ARM::LDRSH; break; } - MCInst LdStInst; - LdStInst.setOpcode(Opcode); - LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - LdStInst.addOperand(MCOperand::CreateReg(ARM::PC)); - LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - LdStInst.addOperand(MCOperand::CreateImm(0)); - // Add predicate operands. - LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg())); - OutStreamer.EmitInstruction(LdStInst); + OutStreamer.EmitInstruction(MCInstBuilder(Opcode) + .addReg(MI->getOperand(0).getReg()) + .addReg(ARM::PC) + .addReg(MI->getOperand(1).getReg()) + .addImm(0) + // Add predicate operands. + .addImm(MI->getOperand(3).getImm()) + .addReg(MI->getOperand(4).getReg())); return; } @@ -1606,29 +1572,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case ARM::t2BR_JT: { // Lower and emit the instruction itself, then the jump table following it. - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + // Output the data for the jump table itself EmitJump2Table(MI); return; } case ARM::t2TBB_JT: { // Lower and emit the instruction itself, then the jump table following it. - MCInst TmpInst; + OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBB) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); - TmpInst.setOpcode(ARM::t2TBB); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); // Output the data for the jump table itself EmitJump2Table(MI); // Make sure the next instruction is 2-byte aligned. @@ -1637,15 +1600,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case ARM::t2TBH_JT: { // Lower and emit the instruction itself, then the jump table following it. - MCInst TmpInst; + OutStreamer.EmitInstruction(MCInstBuilder(ARM::t2TBH) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); - TmpInst.setOpcode(ARM::t2TBH); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); // Output the data for the jump table itself EmitJump2Table(MI); return; @@ -1705,17 +1666,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { case ARM::BR_JTadd: { // Lower and emit the instruction itself, then the jump table following it. // add pc, target, idx - MCInst TmpInst; - TmpInst.setOpcode(ARM::ADDrr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - // Add predicate operands. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - // Add 's' bit operand (always reg0 for this) - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDrr) + .addReg(ARM::PC) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0) + // Add 's' bit operand (always reg0 for this) + .addReg(0)); // Output the data for the jump table itself EmitJumpTable(MI); @@ -1733,6 +1692,13 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } break; } + case ARM::TRAPNaCl: { + //.long 0xe7fedef0 @ trap + uint32_t Val = 0xe7fedef0UL; + OutStreamer.AddComment("trap"); + OutStreamer.EmitIntValue(Val, 4); + return; + } case ARM::tTRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. @@ -1759,75 +1725,57 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ValReg = MI->getOperand(1).getReg(); MCSymbol *Label = GetARMSJLJEHLabel(); - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + OutStreamer.AddComment("eh_setjmp begin"); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + .addReg(ValReg) + .addReg(ARM::PC) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("eh_setjmp begin"); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tADDi3); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tADDi3) + .addReg(ValReg) // 's' bit operand - TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateImm(7)); + .addReg(ARM::CPSR) + .addReg(ValReg) + .addImm(7) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tSTRi); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tSTRi) + .addReg(ValReg) + .addReg(SrcReg) // The offset immediate is #4. The operand value is scaled by 4 for the // tSTR instruction. - TmpInst.addOperand(MCOperand::CreateImm(1)); + .addImm(1) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVi8); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8) + .addReg(ARM::R0) + .addReg(ARM::CPSR) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext); - MCInst TmpInst; - TmpInst.setOpcode(ARM::tB); - TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr)); - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVi8); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); - TmpInst.addOperand(MCOperand::CreateImm(1)); + .addImm(ARMCC::AL) + .addReg(0)); + + const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tB) + .addExpr(SymbolExpr) + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.AddComment("eh_setjmp end"); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVi8) + .addReg(ARM::R0) + .addReg(ARM::CPSR) + .addImm(1) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("eh_setjmp end"); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); + OutStreamer.EmitLabel(Label); return; } @@ -1843,69 +1791,53 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ValReg = MI->getOperand(1).getReg(); - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::ADDri); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateImm(8)); + OutStreamer.AddComment("eh_setjmp begin"); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri) + .addReg(ValReg) + .addReg(ARM::PC) + .addImm(8) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // 's' bit operand (always reg0 for this). - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("eh_setjmp begin"); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::STRi12); - TmpInst.addOperand(MCOperand::CreateReg(ValReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(4)); + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::STRi12) + .addReg(ValReg) + .addReg(SrcReg) + .addImm(4) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVi); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi) + .addReg(ARM::R0) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // 's' bit operand (always reg0 for this). - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::ADDri); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::ADDri) + .addReg(ARM::PC) + .addReg(ARM::PC) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // 's' bit operand (always reg0 for this). - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVi); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R0)); - TmpInst.addOperand(MCOperand::CreateImm(1)); + .addReg(0)); + + OutStreamer.AddComment("eh_setjmp end"); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVi) + .addReg(ARM::R0) + .addImm(1) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + .addImm(ARMCC::AL) + .addReg(0) // 's' bit operand (always reg0 for this). - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.AddComment("eh_setjmp end"); - OutStreamer.EmitInstruction(TmpInst); - } + .addReg(0)); return; } case ARM::Int_eh_sjlj_longjmp: { @@ -1915,48 +1847,35 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // bx $scratch unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ScratchReg = MI->getOperand(1).getReg(); - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::LDRi12); - TmpInst.addOperand(MCOperand::CreateReg(ARM::SP)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(8)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + .addReg(ARM::SP) + .addReg(SrcReg) + .addImm(8) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::LDRi12); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(4)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + .addReg(ScratchReg) + .addReg(SrcReg) + .addImm(4) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::LDRi12); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::LDRi12) + .addReg(ARM::R7) + .addReg(SrcReg) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::BX); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::BX) + .addReg(ScratchReg) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); return; } case ARM::tInt_eh_sjlj_longjmp: { @@ -1967,60 +1886,44 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // bx $scratch unsigned SrcReg = MI->getOperand(0).getReg(); unsigned ScratchReg = MI->getOperand(1).getReg(); - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRi); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + .addReg(ScratchReg) + .addReg(SrcReg) // The offset immediate is #8. The operand value is scaled by 4 for the // tLDR instruction. - TmpInst.addOperand(MCOperand::CreateImm(2)); + .addImm(2) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tMOVr); - TmpInst.addOperand(MCOperand::CreateReg(ARM::SP)); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tMOVr) + .addReg(ARM::SP) + .addReg(ScratchReg) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRi); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(1)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + .addReg(ScratchReg) + .addReg(SrcReg) + .addImm(1) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRi); - TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); - TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateImm(0)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tLDRi) + .addReg(ARM::R7) + .addReg(SrcReg) + .addImm(0) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } - { - MCInst TmpInst; - TmpInst.setOpcode(ARM::tBX); - TmpInst.addOperand(MCOperand::CreateReg(ScratchReg)); + .addImm(ARMCC::AL) + .addReg(0)); + + OutStreamer.EmitInstruction(MCInstBuilder(ARM::tBX) + .addReg(ScratchReg) // Predicate. - TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); - TmpInst.addOperand(MCOperand::CreateReg(0)); - OutStreamer.EmitInstruction(TmpInst); - } + .addImm(ARMCC::AL) + .addReg(0)); return; } } diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index c875b2c..c945e4f 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===// +//===-- ARMAsmPrinter.h - ARM implementation of AsmPrinter ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// ARM Assembly printer class. -// -//===----------------------------------------------------------------------===// #ifndef ARMASMPRINTER_H #define ARMASMPRINTER_H @@ -54,7 +50,7 @@ public: } virtual const char *getPassName() const LLVM_OVERRIDE { - return "ARM Assembly Printer"; + return "ARM Assembly / Object Emitter"; } void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, @@ -121,7 +117,7 @@ private: MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const; - MCSymbol *GetARMSJLJEHLabel(void) const; + MCSymbol *GetARMSJLJEHLabel() const; MCSymbol *GetARMGVSymbol(const GlobalValue *GV); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3c7bb24..9e68ff4 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -18,9 +18,7 @@ #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,12 +27,14 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/STLExtras.h" #define GET_INSTRINFO_CTOR #include "ARMGenInstrInfo.inc" @@ -106,7 +106,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM, const InstrItineraryData *II = TM->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } - return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); } ScheduleHazardRecognizer *ARMBaseInstrInfo:: @@ -115,7 +115,7 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, if (Subtarget.isThumb2() || Subtarget.hasVFP2()) return (ScheduleHazardRecognizer *) new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); - return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); + return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } MachineInstr * @@ -464,8 +464,9 @@ PredicateInstruction(MachineInstr *MI, unsigned Opc = MI->getOpcode(); if (isUncondBranchOpcode(Opc)) { MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); - MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); - MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addImm(Pred[0].getImm()) + .addReg(Pred[1].getReg()); return true; } @@ -1124,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) return false; // Look for a copy between even S-registers. That is where we keep floats @@ -1154,6 +1155,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // All clear, widen the COPY. DEBUG(dbgs() << "widening: " << *MI); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg // or some other super-register. @@ -1165,14 +1167,14 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ MI->setDesc(get(ARM::VMOVD)); MI->getOperand(0).setReg(DstRegD); MI->getOperand(1).setReg(SrcRegD); - AddDefaultPred(MachineInstrBuilder(MI)); + AddDefaultPred(MIB); // We are now reading SrcRegD instead of SrcRegS. This may upset the // register scavenger and machine verifier, so we need to indicate that we // are reading an undefined value from SrcRegD, but a proper value from // SrcRegS. MI->getOperand(1).setIsUndef(); - MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); + MIB.addReg(SrcRegS, RegState::Implicit); // SrcRegD may actually contain an unrelated value in the ssub_1 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. @@ -1269,7 +1271,7 @@ reMaterialize(MachineBasicBlock &MBB, MachineInstr * ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { - MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); + MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); switch(Orig->getOpcode()) { case ARM::tLDRpci_pic: case ARM::t2LDRpci_pic: { @@ -1373,6 +1375,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, /// only return true if the base pointers are the same and the only differences /// between the two addresses is the offset. It also returns the offsets by /// reference. +/// +/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched +/// is permanently disabled. bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, int64_t &Offset1, int64_t &Offset2) const { @@ -1447,6 +1452,9 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, /// from the common base address. It returns true if it decides it's desirable /// to schedule the two loads together. "NumLoads" is the number of loads that /// have already been scheduled after Load1. +/// +/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched +/// is permanently disabled. bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const { @@ -1598,7 +1606,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) return NULL; - MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstruction(MI, NewMI); if (!MI) return NULL; // After swapping the MOVCC operands, also invert the condition. @@ -1607,7 +1615,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return MI; } } - return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + return TargetInstrInfo::commuteInstruction(MI, NewMI); } /// Identify instructions that can be folded into a MOVCC instruction, and @@ -1710,7 +1718,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // same register as operand 0. MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); FalseReg.setImplicit(); - NewMI->addOperand(FalseReg); + NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); // The caller will erase MI, but not DefMI. @@ -2711,7 +2719,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, case ARM::t2STMDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; if (Subtarget.isSwift()) { - // rdar://8402126 int UOps = 1 + NumRegs; // One for address computation, one for each ld / st. switch (Opc) { default: break; @@ -3321,8 +3328,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // instructions). if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI->getParent()->getParent(); - if (MF->getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize)) + if (MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -3726,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - // A9-like cores are particularly picky about mixing the two and want these + // CortexA9 is particularly picky about mixing the two and wants these // converted. - if (Subtarget.isLikeA9() && !isPredicated(MI) && + if (Subtarget.isCortexA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) @@ -3813,7 +3821,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { unsigned DstReg, SrcReg, DReg; unsigned Lane; - MachineInstrBuilder MIB(MI); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); const TargetRegisterInfo *TRI = &getRegisterInfo(); switch (MI->getOpcode()) { default: @@ -4015,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. // // FCONSTD can be used as a dependency-breaking instruction. - - unsigned ARMBaseInstrInfo:: getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - // Only Swift has partial register update problems. - if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + if (!SwiftPartialUpdateClearance || + !(Subtarget.isSwift() || Subtarget.isCortexA15())) return 0; assert(TRI && "Need TRI instance"); @@ -4038,7 +4044,6 @@ getPartialRegUpdateClearance(const MachineInstr *MI, case ARM::VLDRS: case ARM::FCONSTS: case ARM::VMOVSR: - // rdar://problem/8791586 case ARM::VMOVv8i8: case ARM::VMOVv4i16: case ARM::VMOVv2i32: @@ -4049,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI, // Explicitly reads the dependency. case ARM::VLD1LNd32: - UseOp = 1; + UseOp = 3; break; default: return 0; @@ -4118,3 +4123,15 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } + +bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { + unsigned ShOpVal = MI->getOperand(3).getImm(); + unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); + // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. + if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) || + ((ShImm == 1 || ShImm == 2) && + ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl)) + return true; + + return false; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 6f38e35..7c107bb 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -15,10 +15,10 @@ #define ARMBASEINSTRUCTIONINFO_H #include "ARM.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "ARMGenInstrInfo.inc" @@ -314,6 +314,10 @@ public: bool canCauseFpMLxStall(unsigned Opcode) const { return MLxHazardOpcodes.count(Opcode); } + + /// Returns true if the instruction has a shift by immediate that can be + /// executed in one cycle less. + bool isSwiftFastImmShift(const MachineInstr *MI) const; }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e5b300f..b6b27f8 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -18,44 +18,34 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CommandLine.h" #define GET_REGINFO_TARGET_DESC #include "ARMGenRegisterInfo.inc" using namespace llvm; -static cl::opt<bool> -ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false), - cl::desc("Force use of virtual base registers for stack load/store")); -static cl::opt<bool> -EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden, - cl::desc("Enable pre-regalloc stack frame index allocation")); -static cl::opt<bool> -EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true), - cl::desc("Enable use of a base pointer for complex stack frames")); - ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR), TII(tii), STI(sti), + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } @@ -173,154 +163,63 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -/// getRawAllocationOrder - Returns the register allocation order for a -/// specified register class with a target-dependent hint. -ArrayRef<uint16_t> -ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - // Alternative register allocation orders when favoring even / odd registers - // of register pairs. - - // No FP, R9 is available. - static const uint16_t GPREven1[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, - ARM::R9, ARM::R11 - }; - static const uint16_t GPROdd1[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, - ARM::R8, ARM::R10 - }; - - // FP is R7, R9 is available. - static const uint16_t GPREven2[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, - ARM::R9, ARM::R11 - }; - static const uint16_t GPROdd2[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, - ARM::R8, ARM::R10 - }; - - // FP is R11, R9 is available. - static const uint16_t GPREven3[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, - ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, - ARM::R9 - }; - static const uint16_t GPROdd3[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, - ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, - ARM::R8 - }; - - // No FP, R9 is not available. - static const uint16_t GPREven4[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, - ARM::R11 - }; - static const uint16_t GPROdd4[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, - ARM::R10 - }; - - // FP is R7, R9 is not available. - static const uint16_t GPREven5[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, - ARM::R11 - }; - static const uint16_t GPROdd5[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, - ARM::R10 - }; - - // FP is R11, R9 is not available. - static const uint16_t GPREven6[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, - ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 - }; - static const uint16_t GPROdd6[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, - ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 - }; - - // We only support even/odd hints for GPR and rGPR. - if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass) - return RC->getRawAllocationOrder(MF); - - if (HintType == ARMRI::RegPairEven) { - if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) - // It's no longer possible to fulfill this hint. Return the default - // allocation order. - return RC->getRawAllocationOrder(MF); - - if (!TFI->hasFP(MF)) { - if (!STI.isR9Reserved()) - return makeArrayRef(GPREven1); - else - return makeArrayRef(GPREven4); - } else if (FramePtr == ARM::R7) { - if (!STI.isR9Reserved()) - return makeArrayRef(GPREven2); - else - return makeArrayRef(GPREven5); - } else { // FramePtr == ARM::R11 - if (!STI.isR9Reserved()) - return makeArrayRef(GPREven3); - else - return makeArrayRef(GPREven6); - } - } else if (HintType == ARMRI::RegPairOdd) { - if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) - // It's no longer possible to fulfill this hint. Return the default - // allocation order. - return RC->getRawAllocationOrder(MF); - - if (!TFI->hasFP(MF)) { - if (!STI.isR9Reserved()) - return makeArrayRef(GPROdd1); - else - return makeArrayRef(GPROdd4); - } else if (FramePtr == ARM::R7) { - if (!STI.isR9Reserved()) - return makeArrayRef(GPROdd2); - else - return makeArrayRef(GPROdd5); - } else { // FramePtr == ARM::R11 - if (!STI.isR9Reserved()) - return makeArrayRef(GPROdd3); - else - return makeArrayRef(GPROdd6); - } - } - return RC->getRawAllocationOrder(MF); +// Get the other register in a GPRPair. +static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) { + for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers) + if (ARM::GPRPairRegClass.contains(*Supers)) + return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0); + return 0; } -/// ResolveRegAllocHint - Resolves the specified register allocation hint -/// to a physical register. Returns the physical register if it is successful. -unsigned -ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, - const MachineFunction &MF) const { - if (Reg == 0 || !isPhysicalRegister(Reg)) - return 0; - if (Type == 0) - return Reg; - else if (Type == (unsigned)ARMRI::RegPairOdd) - // Odd register. - return getRegisterPairOdd(Reg, MF); - else if (Type == (unsigned)ARMRI::RegPairEven) - // Even register. - return getRegisterPairEven(Reg, MF); - return 0; +// Resolve the RegPairEven / RegPairOdd register allocator hints. +void +ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg); + + unsigned Odd; + switch (Hint.first) { + case ARMRI::RegPairEven: + Odd = 0; + break; + case ARMRI::RegPairOdd: + Odd = 1; + break; + default: + TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); + return; + } + + // This register should preferably be even (Odd == 0) or odd (Odd == 1). + // Check if the other part of the pair has already been assigned, and provide + // the paired register as the first hint. + unsigned PairedPhys = 0; + if (VRM && VRM->hasPhys(Hint.second)) { + PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this); + if (PairedPhys && MRI.isReserved(PairedPhys)) + PairedPhys = 0; + } + + // First prefer the paired physreg. + if (PairedPhys && + std::find(Order.begin(), Order.end(), PairedPhys) != Order.end()) + Hints.push_back(PairedPhys); + + // Then prefer even or odd registers. + for (unsigned I = 0, E = Order.size(); I != E; ++I) { + unsigned Reg = Order[I]; + if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd) + continue; + // Don't provide hints that are paired to a reserved register. + unsigned Paired = getPairedGPR(Reg, !Odd, this); + if (!Paired || MRI.isReserved(Paired)) + continue; + Hints.push_back(Reg); + } } void @@ -371,9 +270,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (!EnableBasePointer) - return false; - // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the // emergency spill slot. @@ -419,8 +315,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // pointer adjustments around calls. if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) return true; - if (!EnableBasePointer) - return false; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. return MRI->canReserveReg(BasePtr); @@ -433,7 +327,8 @@ needsStackRealignment(const MachineFunction &MF) const { unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || - F->getFnAttributes().hasAttribute(Attributes::StackAlignment)); + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); return requiresRealignment && canRealignStack(MF); } @@ -464,114 +359,6 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); } -unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, - const MachineFunction &MF) const { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - switch (Reg) { - default: break; - // Return 0 if either register of the pair is a special register. - // So no R12, etc. - case ARM::R1: return ARM::R0; - case ARM::R3: return ARM::R2; - case ARM::R5: return ARM::R4; - case ARM::R7: - return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6)) - ? 0 : ARM::R6; - case ARM::R9: return MRI.isReserved(ARM::R9) ? 0 :ARM::R8; - case ARM::R11: return MRI.isReserved(ARM::R11) ? 0 : ARM::R10; - - case ARM::S1: return ARM::S0; - case ARM::S3: return ARM::S2; - case ARM::S5: return ARM::S4; - case ARM::S7: return ARM::S6; - case ARM::S9: return ARM::S8; - case ARM::S11: return ARM::S10; - case ARM::S13: return ARM::S12; - case ARM::S15: return ARM::S14; - case ARM::S17: return ARM::S16; - case ARM::S19: return ARM::S18; - case ARM::S21: return ARM::S20; - case ARM::S23: return ARM::S22; - case ARM::S25: return ARM::S24; - case ARM::S27: return ARM::S26; - case ARM::S29: return ARM::S28; - case ARM::S31: return ARM::S30; - - case ARM::D1: return ARM::D0; - case ARM::D3: return ARM::D2; - case ARM::D5: return ARM::D4; - case ARM::D7: return ARM::D6; - case ARM::D9: return ARM::D8; - case ARM::D11: return ARM::D10; - case ARM::D13: return ARM::D12; - case ARM::D15: return ARM::D14; - case ARM::D17: return ARM::D16; - case ARM::D19: return ARM::D18; - case ARM::D21: return ARM::D20; - case ARM::D23: return ARM::D22; - case ARM::D25: return ARM::D24; - case ARM::D27: return ARM::D26; - case ARM::D29: return ARM::D28; - case ARM::D31: return ARM::D30; - } - - return 0; -} - -unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, - const MachineFunction &MF) const { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - switch (Reg) { - default: break; - // Return 0 if either register of the pair is a special register. - // So no R12, etc. - case ARM::R0: return ARM::R1; - case ARM::R2: return ARM::R3; - case ARM::R4: return ARM::R5; - case ARM::R6: - return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6)) - ? 0 : ARM::R7; - case ARM::R8: return MRI.isReserved(ARM::R9) ? 0 :ARM::R9; - case ARM::R10: return MRI.isReserved(ARM::R11) ? 0 : ARM::R11; - - case ARM::S0: return ARM::S1; - case ARM::S2: return ARM::S3; - case ARM::S4: return ARM::S5; - case ARM::S6: return ARM::S7; - case ARM::S8: return ARM::S9; - case ARM::S10: return ARM::S11; - case ARM::S12: return ARM::S13; - case ARM::S14: return ARM::S15; - case ARM::S16: return ARM::S17; - case ARM::S18: return ARM::S19; - case ARM::S20: return ARM::S21; - case ARM::S22: return ARM::S23; - case ARM::S24: return ARM::S25; - case ARM::S26: return ARM::S27; - case ARM::S28: return ARM::S29; - case ARM::S30: return ARM::S31; - - case ARM::D0: return ARM::D1; - case ARM::D2: return ARM::D3; - case ARM::D4: return ARM::D5; - case ARM::D6: return ARM::D7; - case ARM::D8: return ARM::D9; - case ARM::D10: return ARM::D11; - case ARM::D12: return ARM::D13; - case ARM::D14: return ARM::D15; - case ARM::D16: return ARM::D17; - case ARM::D18: return ARM::D19; - case ARM::D20: return ARM::D21; - case ARM::D22: return ARM::D23; - case ARM::D24: return ARM::D25; - case ARM::D26: return ARM::D27; - case ARM::D28: return ARM::D29; - case ARM::D30: return ARM::D31; - } - - return 0; -} - /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void ARMBaseRegisterInfo:: @@ -611,65 +398,7 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: requiresVirtualBaseRegisters(const MachineFunction &MF) const { - return EnableLocalStackAlloc; -} - -static void -emitSPUpdate(bool isARM, - MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { - if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); - else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); -} - - -void ARMBaseRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - assert(!AFI->isThumb1OnlyFunction() && - "This eliminateCallFramePseudoInstr does not support Thumb1!"); - bool isARM = !AFI->isThumbFunction(); - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - int PIdx = Old->findFirstPredOperandIdx(); - ARMCC::CondCodes Pred = (PIdx == -1) - ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old->getOperand(2).getReg(); - emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg); - } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old->getOperand(3).getReg(); - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg); - } - } - } - MBB.erase(I); + return true; } int64_t ARMBaseRegisterInfo:: @@ -750,8 +479,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { case ARM::VLDRS: case ARM::VLDRD: case ARM::VSTRS: case ARM::VSTRD: case ARM::tSTRspi: case ARM::tLDRspi: - if (ForceAllBaseRegAlloc) - return true; break; default: return false; @@ -933,8 +660,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, void ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - unsigned i = 0; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -943,13 +670,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); unsigned FrameReg; int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); @@ -959,7 +680,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // means the stack pointer cannot be used to access the emergency spill slot // when !hasReservedCallFrame(). #ifndef NDEBUG - if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ assert(TFI->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); @@ -971,18 +692,18 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special handling of dbg_value instructions. if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); return; } // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; if (!AFI->isThumbFunction()) - Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII); + Done = rewriteARMFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); else { assert(AFI->isThumb2Function()); - Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); + Done = rewriteT2FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII); } if (Done) return; @@ -1002,7 +723,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); if (Offset == 0) // Must be addrmode4/6. - MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(&ARM::GPRRegClass); if (!AFI->isThumbFunction()) @@ -1014,6 +735,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } // Update the original instruction to use the scratch register. - MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false,true); } } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index e2bdd04..725033b 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -111,12 +111,11 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; - ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const; - - unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, - const MachineFunction &MF) const; + void getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM) const; void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const; @@ -169,17 +168,9 @@ public: virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const; - virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; - -private: - unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; - - unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 0bd1c3e..e6e8c3d 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -18,8 +18,8 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 6adbf4f..5e8e173 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -14,16 +14,13 @@ #define DEBUG_TYPE "jit" #include "ARM.h" -#include "ARMConstantPoolValue.h" #include "ARMBaseInstrInfo.h" +#include "ARMConstantPoolValue.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/PassManager.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -31,7 +28,10 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -371,12 +371,16 @@ FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, } bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - assert((MF.getTarget().getRelocationModel() != Reloc::Default || - MF.getTarget().getRelocationModel() != Reloc::Static) && + TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget()); + + assert((Target.getRelocationModel() != Reloc::Default || + Target.getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo(); - II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo(); - TD = MF.getTarget().getDataLayout(); + + JTI = static_cast<ARMJITInfo*>(Target.getJITInfo()); + II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo()); + TD = Target.getDataLayout(); + Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index a57368f..4891609 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -16,23 +16,23 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" -#include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" #include <algorithm> using namespace llvm; @@ -1468,7 +1468,7 @@ void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { if (CPEBB->empty()) { BBInfo[CPEBB->getNumber()].Size = 0; - // This block no longer needs to be aligned. <rdar://problem/10534709>. + // This block no longer needs to be aligned. CPEBB->setAlignment(0); } else // Entries are sorted by descending alignment, so realign from the front. diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index fa3226e..4e703ec 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -13,11 +13,11 @@ #include "ARMConstantPoolValue.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/Constant.h" -#include "llvm/Constants.h" -#include "llvm/GlobalValue.h" -#include "llvm/Type.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> using namespace llvm; @@ -206,11 +206,7 @@ ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s, bool AddCurrentAddress) : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier, AddCurrentAddress), - S(strdup(s)) {} - -ARMConstantPoolSymbol::~ARMConstantPoolSymbol() { - free((void*)S); -} + S(s) {} ARMConstantPoolSymbol * ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, @@ -218,14 +214,6 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false); } -static bool CPV_streq(const char *S1, const char *S2) { - if (S1 == S2) - return true; - if (S1 && S2 && strcmp(S1, S2) == 0) - return true; - return false; -} - int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; @@ -238,7 +226,7 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV); if (!APS) continue; - if (CPV_streq(APS->S, S) && equals(APS)) + if (APS->S == S && equals(APS)) return i; } } @@ -248,12 +236,11 @@ int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) { const ARMConstantPoolSymbol *ACPS = dyn_cast<ARMConstantPoolSymbol>(ACPV); - return ACPS && CPV_streq(ACPS->S, S) && - ARMConstantPoolValue::hasSameValue(ACPV); + return ACPS && ACPS->S == S && ARMConstantPoolValue::hasSameValue(ACPV); } void ARMConstantPoolSymbol::addSelectionDAGCSEId(FoldingSetNodeID &ID) { - ID.AddPointer(S); + ID.AddString(S); ARMConstantPoolValue::addSelectionDAGCSEId(ID); } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index ae531c4..93812fe 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -161,19 +161,17 @@ public: /// ARMConstantPoolSymbol - ARM-specific constantpool values for external /// symbols. class ARMConstantPoolSymbol : public ARMConstantPoolValue { - const char *S; // ExtSymbol being loaded. + const std::string S; // ExtSymbol being loaded. ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id, unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress); public: - ~ARMConstantPoolSymbol(); - static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s, unsigned ID, unsigned char PCAdj); - const char *getSymbol() const { return S; } + const char *getSymbol() const { return S.c_str(); } virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 8c45e0b..beb843c 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -23,10 +23,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; static cl::opt<bool> diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6611862..29fcd40 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -16,31 +16,31 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" -#include "ARMTargetMachine.h" -#include "ARMSubtarget.h" #include "ARMConstantPoolValue.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" @@ -146,6 +146,7 @@ class ARMFastISel : public FastISel { virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); + virtual bool FastLowerArguments(); private: #include "ARMGenFastISel.inc" @@ -178,23 +179,24 @@ class ARMFastISel : public FastISel { bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment = 0, bool isZExt = true, bool allocReg = true); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); - void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); + void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3); bool ARMIsMemCpySmall(uint64_t Len); - bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len); - unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); - unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); - unsigned ARMMaterializeInt(const Constant *C, EVT VT); - unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); - unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); - unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, + unsigned Alignment); + unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); + unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned ARMMaterializeInt(const Constant *C, MVT VT); + unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT); + unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg); + unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg); unsigned ARMSelectCallOp(bool UseReg); - unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, EVT VT); + unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); // Call handling routines. private: @@ -220,7 +222,7 @@ class ARMFastISel : public FastISel { bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); - void AddLoadStoreOperands(EVT VT, Address &Addr, + void AddLoadStoreOperands(MVT VT, Address &Addr, const MachineInstrBuilder &MIB, unsigned Flags, bool useAM3); }; @@ -486,7 +488,7 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, // TODO: Don't worry about 64-bit now, but when this is fixed remove the // checks from the various callers. -unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { +unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) { if (VT == MVT::f64) return 0; unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); @@ -496,7 +498,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { return MoveReg; } -unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { +unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) { if (VT == MVT::i64) return 0; unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); @@ -509,7 +511,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { // For double width floating point we need to materialize two constants // (the high and the low) into integer registers then use a move to get // the combined constant into an FP reg. -unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { +unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { const APFloat Val = CFP->getValueAPF(); bool is64bit = VT == MVT::f64; @@ -553,7 +555,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { return DestReg; } -unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { +unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) return false; @@ -563,7 +565,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { const ConstantInt *CI = cast<ConstantInt>(C); if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16; - unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass : + &ARM::GPRRegClass; + unsigned ImmReg = createResultReg(RC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) .addImm(CI->getZExtValue())); @@ -613,7 +617,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { return DestReg; } -unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { +unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. if (VT != MVT::i32) return 0; @@ -716,10 +720,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { } unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { - EVT VT = TLI.getValueType(C->getType(), true); + EVT CEVT = TLI.getValueType(C->getType(), true); // Only handle simple types. - if (!VT.isSimple()) return 0; + if (!CEVT.isSimple()) return 0; + MVT VT = CEVT.getSimpleVT(); if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return ARMMaterializeFP(CFP, VT); @@ -895,12 +900,9 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { return Addr.Base.Reg != 0; } -void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { - - assert(VT.isSimple() && "Non-simple types are invalid here!"); - +void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { bool needsLowering = false; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: llvm_unreachable("Unhandled load/store type!"); case MVT::i1: case MVT::i8: @@ -951,13 +953,12 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { } } -void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, +void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr, const MachineInstrBuilder &MIB, unsigned Flags, bool useAM3) { // addrmode5 output depends on the selection dag addressing dividing the // offset by 4 that it then later multiplies. Do this here as well. - if (VT.getSimpleVT().SimpleTy == MVT::f32 || - VT.getSimpleVT().SimpleTy == MVT::f64) + if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64) Addr.Offset /= 4; // Frame base works a bit differently. Handle it separately. @@ -1000,14 +1001,13 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, +bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, unsigned Alignment, bool isZExt, bool allocReg) { - assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; bool useAM3 = false; bool needVMOV = false; const TargetRegisterClass *RC; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: @@ -1124,11 +1124,11 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, +bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment) { unsigned StrOpc; bool useAM3 = false; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: { @@ -1402,8 +1402,9 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) { bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt) { Type *Ty = Src1Value->getType(); - EVT SrcVT = TLI.getValueType(Ty, true); - if (!SrcVT.isSimple()) return false; + EVT SrcEVT = TLI.getValueType(Ty, true); + if (!SrcEVT.isSimple()) return false; + MVT SrcVT = SrcEVT.getSimpleVT(); bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); if (isFloat && !Subtarget->hasVFP2()) @@ -1440,7 +1441,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, unsigned CmpOpc; bool isICmp = true; bool needsExt = false; - switch (SrcVT.getSimpleVT().SimpleTy) { + switch (SrcVT.SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: @@ -1592,7 +1593,10 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { return false; Value *Src = I->getOperand(0); - EVT SrcVT = TLI.getValueType(Src->getType(), true); + EVT SrcEVT = TLI.getValueType(Src->getType(), true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; @@ -1601,8 +1605,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { // Handle sign-extension. if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { - EVT DestVT = MVT::i32; - SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, + SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32, /*isZExt*/!isSigned); if (SrcReg == 0) return false; } @@ -1665,7 +1668,6 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { // Things need to be register sized for register moves. if (VT != MVT::i32) return false; - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned CondReg = getRegForValue(I->getOperand(0)); if (CondReg == 0) return false; @@ -1698,14 +1700,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { .addReg(CondReg).addImm(0)); unsigned MovCCOpc; + const TargetRegisterClass *RC; if (!UseImm) { + RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass; MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; } else { - if (!isNegativeImm) { + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; + if (!isNegativeImm) MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; - } else { + else MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; - } } unsigned ResultReg = createResultReg(RC); if (!UseImm) @@ -1807,7 +1811,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { } bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { - EVT VT = TLI.getValueType(I->getType(), true); + EVT FPVT = TLI.getValueType(I->getType(), true); + if (!FPVT.isSimple()) return false; + MVT VT = FPVT.getSimpleVT(); // We can get here in the case when we want to use NEON for our fp // operations, but can't figure out how to. Just use the vfp instructions @@ -1838,7 +1844,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { unsigned Op2 = getRegForValue(I->getOperand(1)); if (Op2 == 0) return false; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(Op1).addReg(Op2)); @@ -2051,7 +2057,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, if (RVLocs.size() == 2 && RetVT == MVT::f64) { // For this move we copy into two registers and then move into the // double fp reg we want. - EVT DestVT = RVLocs[0].getValVT(); + MVT DestVT = RVLocs[0].getValVT(); const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); unsigned ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -2066,7 +2072,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, UpdateValueMap(I, ResultReg); } else { assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); - EVT CopyVT = RVLocs[0].getValVT(); + MVT CopyVT = RVLocs[0].getValVT(); // Special handling for extended integers. if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) @@ -2094,11 +2100,13 @@ bool ARMFastISel::SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + // Build a list of return value registers. + SmallVector<unsigned, 4> RetRegs; + CallingConv::ID CC = F.getCallingConv(); if (Ret->getNumOperands() > 0) { SmallVector<ISD::OutputArg, 4> Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; @@ -2125,8 +2133,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return false; unsigned SrcReg = Reg + VA.getValNo(); - EVT RVVT = TLI.getValueType(RV->getType()); - EVT DestVT = VA.getValVT(); + EVT RVEVT = TLI.getValueType(RV->getType()); + if (!RVEVT.isSimple()) return false; + MVT RVVT = RVEVT.getSimpleVT(); + MVT DestVT = VA.getValVT(); // Special handling for extended integers. if (RVVT != DestVT) { if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) @@ -2151,13 +2161,16 @@ bool ARMFastISel::SelectRet(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); - // Mark the register as live out of the function. - MRI.addLiveOut(VA.getLocReg()); + // Add register to return instruction. + RetRegs.push_back(VA.getLocReg()); } unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(RetOpc))); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(RetOpc)); + AddOptionalDefs(MIB); + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); return true; } @@ -2171,7 +2184,9 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { unsigned ARMFastISel::getLibcallReg(const Twine &Name) { GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, GlobalValue::ExternalLinkage, 0, Name); - return ARMMaterializeGV(GV, TLI.getValueType(GV->getType())); + EVT LCREVT = TLI.getValueType(GV->getType()); + if (!LCREVT.isSimple()) return 0; + return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } // A quick function that will emit a call for a named libcall in F with the @@ -2280,6 +2295,9 @@ bool ARMFastISel::SelectCall(const Instruction *I, // Can't handle inline asm. if (isa<InlineAsm>(Callee)) return false; + // Allow SelectionDAG isel to handle tail calls. + if (CI->isTailCall()) return false; + // Check the calling convention. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); @@ -2328,16 +2346,16 @@ bool ARMFastISel::SelectCall(const Instruction *I, ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attributes::SExt)) + if (CS.paramHasAttr(AttrInd, Attribute::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attributes::ZExt)) + if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) Flags.setZExt(); // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attributes::InReg) || - CS.paramHasAttr(AttrInd, Attributes::StructRet) || - CS.paramHasAttr(AttrInd, Attributes::Nest) || - CS.paramHasAttr(AttrInd, Attributes::ByVal)) + if (CS.paramHasAttr(AttrInd, Attribute::InReg) || + CS.paramHasAttr(AttrInd, Attribute::StructRet) || + CS.paramHasAttr(AttrInd, Attribute::Nest) || + CS.paramHasAttr(AttrInd, Attribute::ByVal)) return false; Type *ArgTy = (*i)->getType(); @@ -2419,21 +2437,29 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { } bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, - uint64_t Len) { + uint64_t Len, unsigned Alignment) { // Make sure we don't bloat code by inlining very large memcpy's. if (!ARMIsMemCpySmall(Len)) return false; - // We don't care about alignment here since we just emit integer accesses. while (Len) { MVT VT; - if (Len >= 4) - VT = MVT::i32; - else if (Len >= 2) - VT = MVT::i16; - else { - assert(Len == 1); - VT = MVT::i8; + if (!Alignment || Alignment >= 4) { + if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert (Len == 1 && "Expected a length of 1!"); + VT = MVT::i8; + } + } else { + // Bound based on alignment. + if (Len >= 2 && Alignment == 2) + VT = MVT::i16; + else { + VT = MVT::i8; + } } bool RV; @@ -2512,7 +2538,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { if (!ARMComputeAddress(MTI.getRawDest(), Dest) || !ARMComputeAddress(MTI.getRawSource(), Src)) return false; - if (ARMTryEmitSmallMemCpy(Dest, Src, Len)) + unsigned Alignment = MTI.getAlignment(); + if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment)) return true; } } @@ -2541,7 +2568,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { return SelectCall(&I, "memset"); } case Intrinsic::trap: { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::TRAP)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get( + Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP)); return true; } } @@ -2570,18 +2598,19 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) { return true; } -unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, +unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return 0; unsigned Opc; bool isBoolZext = false; - if (!SrcVT.isSimple()) return 0; - switch (SrcVT.getSimpleVT().SimpleTy) { + const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + switch (SrcVT.SimpleTy) { default: return 0; case MVT::i16: if (!Subtarget->hasV6Ops()) return 0; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; if (isZExt) Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; else @@ -2589,6 +2618,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, break; case MVT::i8: if (!Subtarget->hasV6Ops()) return 0; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; if (isZExt) Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; else @@ -2596,6 +2626,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, break; case MVT::i1: if (isZExt) { + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; isBoolZext = true; break; @@ -2603,7 +2634,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, return 0; } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(RC); MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg); @@ -2622,14 +2653,18 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); - EVT SrcVT, DestVT; - SrcVT = TLI.getValueType(SrcTy, true); - DestVT = TLI.getValueType(DestTy, true); - bool isZExt = isa<ZExtInst>(I); unsigned SrcReg = getRegForValue(Src); if (!SrcReg) return false; + EVT SrcEVT, DestEVT; + SrcEVT = TLI.getValueType(SrcTy, true); + DestEVT = TLI.getValueType(DestTy, true); + if (!SrcEVT.isSimple()) return false; + if (!DestEVT.isSimple()) return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); if (ResultReg == 0) return false; UpdateValueMap(I, ResultReg); @@ -2809,7 +2844,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, } unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, - unsigned Align, EVT VT) { + unsigned Align, MVT VT) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolConstant *CPV = ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); @@ -2849,6 +2884,80 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, return DestReg2; } +bool ARMFastISel::FastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + CallingConv::ID CC = F->getCallingConv(); + switch (CC) { + default: + return false; + case CallingConv::Fast: + case CallingConv::C: + case CallingConv::ARM_AAPCS_VFP: + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + break; + } + + // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments + // which are passed in r0 - r3. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (Idx > 4) + return false; + + if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || + F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) + return false; + + Type *ArgTy = I->getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + return false; + + EVT ArgVT = TLI.getValueType(ArgTy); + if (!ArgVT.isSimple()) return false; + switch (ArgVT.getSimpleVT().SimpleTy) { + case MVT::i8: + case MVT::i16: + case MVT::i32: + break; + default: + return false; + } + } + + + static const uint16_t GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 + }; + + const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + Idx = 0; + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++Idx) { + if (I->use_empty()) + continue; + unsigned SrcReg = GPRArgRegs[Idx]; + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); + // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. + // Without this, EmitLiveInCopies may eliminate the livein if its only + // use is a bitcast (which isn't turned into an instruction). + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(DstReg, getKillRegState(true)); + UpdateValueMap(I, ResultReg); + } + + return true; +} + namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 9392497..7a02adf 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -15,17 +15,16 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "llvm/CallingConv.h" -#include "llvm/Function.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -120,13 +119,14 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - ARMCC::AL, 0, TII, MIFlags); + Pred, PredReg, TII, MIFlags); else emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - ARMCC::AL, 0, TII, MIFlags); + Pred, PredReg, TII, MIFlags); } void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -696,7 +696,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); if (DeleteRet) { - MIB->copyImplicitOps(&*MI); + MIB.copyImplicitOps(&*MI); MI->eraseFromParent(); } MI = MIB; @@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, return FnSize; } -/// estimateStackSize - Estimate and return the size of the frame. -/// FIXME: Make generic? -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these /// instructions will require a scratch register during their expansion later. @@ -1153,7 +1101,8 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) return; // We are planning to use NEON instructions vst1 / vld1. @@ -1234,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // we've used all the registers and so R4 is already used, so not marking // it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = estimateStackSize(MF); + unsigned StackSize = MFI->estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) MRI.setPhysRegUsed(ARM::R4); } @@ -1329,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // worth the effort and added fragility? bool BigStack = (RS && - (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + (MFI->estimateStackSize(MF) + + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= estimateRSStackSizeLimit(MF, this))) || MFI->hasVarSizedObjects() || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); @@ -1418,7 +1368,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } @@ -1430,3 +1380,51 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, AFI->setLRIsSpilledForFarJump(true); } } + + +void ARMFrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const ARMBaseInstrInfo &TII = + *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + assert(!AFI->isThumb1OnlyFunction() && + "This eliminateCallFramePseudoInstr does not support Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + int PIdx = Old->findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. + unsigned PredReg = Old->getOperand(2).getReg(); + emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, + Pred, PredReg); + } else { + // Note: PredReg is operand 3 for ADJCALLSTACKUP. + unsigned PredReg = Old->getOperand(3).getReg(); + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, + Pred, PredReg); + } + } + } + MBB.erase(I); +} + diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index a1c2b93..efa255a 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -70,6 +70,11 @@ public: unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const; + + virtual void eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index efd6d2b..2c51de2 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -16,24 +16,25 @@ #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -78,6 +79,8 @@ public: return "ARM Instruction Selection"; } + virtual void PreprocessISelDAG(); + /// getI32Imm - Return a target constant of type i32 with the specified /// value. inline SDValue getI32Imm(unsigned Imm) { @@ -255,6 +258,8 @@ private: // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); + SDNode *SelectInlineAsm(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); @@ -265,15 +270,16 @@ private: char ConstraintCode, std::vector<SDValue> &OutOps); - // Form pairs of consecutive S, D, or Q registers. - SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1); - SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); - SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); + // Form pairs of consecutive R, S, D, or Q registers. + SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); + SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); // Form sequences of 4 consecutive S, D, or Q registers. - SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); // Get the alignment operand for a NEON VLD or VST instruction. SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector); @@ -326,6 +332,87 @@ static bool isScaledConstantInRange(SDValue Node, int Scale, return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; } +void ARMDAGToDAGISel::PreprocessISelDAG() { + if (!Subtarget->hasV6T2Ops()) + return; + + bool isThumb2 = Subtarget->isThumb(); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ) { + SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. + + if (N->getOpcode() != ISD::ADD) + continue; + + // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with + // leading zeros, followed by consecutive set bits, followed by 1 or 2 + // trailing zeros, e.g. 1020. + // Transform the expression to + // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number + // of trailing zeros of c2. The left shift would be folded as an shifter + // operand of 'add' and the 'and' and 'srl' would become a bits extraction + // node (UBFX). + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned And_imm = 0; + if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { + if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) + std::swap(N0, N1); + } + if (!And_imm) + continue; + + // Check if the AND mask is an immediate of the form: 000.....1111111100 + unsigned TZ = CountTrailingZeros_32(And_imm); + if (TZ != 1 && TZ != 2) + // Be conservative here. Shifter operands aren't always free. e.g. On + // Swift, left shifter operand of 1 / 2 for free but others are not. + // e.g. + // ubfx r3, r1, #16, #8 + // ldr.w r3, [r0, r3, lsl #2] + // vs. + // mov.w r9, #1020 + // and.w r2, r9, r1, lsr #14 + // ldr r2, [r0, r2] + continue; + And_imm >>= TZ; + if (And_imm & (And_imm + 1)) + continue; + + // Look for (and (srl X, c1), c2). + SDValue Srl = N1.getOperand(0); + unsigned Srl_imm = 0; + if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || + (Srl_imm <= 2)) + continue; + + // Make sure first operand is not a shifter operand which would prevent + // folding of the left shift. + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (isThumb2) { + if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1)) + continue; + } else { + if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || + SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) + continue; + } + + // Now make the transformation. + Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32, + Srl.getOperand(0), + CurDAG->getConstant(Srl_imm+TZ, MVT::i32)); + N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32, + Srl, CurDAG->getConstant(And_imm, MVT::i32)); + N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32, + N1, CurDAG->getConstant(TZ, MVT::i32)); + CurDAG->UpdateNodeOperands(N, N0, N1); + } +} + /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at /// least on current ARM implementations) which should be avoidded. @@ -1444,9 +1531,19 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -/// PairSRegs - Form a D register from a pair of S registers. -/// -SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form a GPRPair pseudo register from a pair of GPR regs. +SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue RegClass = + CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); +} + +/// \brief Form a D register from a pair of S registers. +SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32); @@ -1456,9 +1553,8 @@ SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// PairDRegs - Form a quad register from a pair of D registers. -/// -SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form a quad register from a pair of D registers. +SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); @@ -1467,9 +1563,8 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. -/// -SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { +/// \brief Form 4 consecutive D registers from a pair of Q registers. +SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); @@ -1478,9 +1573,8 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5); } -/// QuadSRegs - Form 4 consecutive S registers. -/// -SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive S registers. +SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = @@ -1494,9 +1588,8 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } -/// QuadDRegs - Form 4 consecutive D registers. -/// -SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive D registers. +SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); @@ -1509,9 +1602,8 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9); } -/// QuadQRegs - Form 4 consecutive Q registers. -/// -SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, +/// \brief Form 4 consecutive Q registers. +SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32); @@ -1784,7 +1876,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V0 = N->getOperand(Vec0Idx + 0); SDValue V1 = N->getOperand(Vec0Idx + 1); if (NumVecs == 2) - SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); else { SDValue V2 = N->getOperand(Vec0Idx + 2); // If it's a vst3, form a quad D-register and leave the last part as @@ -1792,13 +1884,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) : N->getOperand(Vec0Idx + 3); - SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); } } else { // Form a QQ register. SDValue Q0 = N->getOperand(Vec0Idx); SDValue Q1 = N->getOperand(Vec0Idx + 1); - SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); + SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); } unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : @@ -1840,7 +1932,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(Vec0Idx + 3); - SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); // Store the even D registers. This is always an updating store, so that it // provides the address to the second store for the odd subregs. @@ -1950,18 +2042,18 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, SDValue V1 = N->getOperand(Vec0Idx + 1); if (NumVecs == 2) { if (is64BitVector) - SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); else - SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); + SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); } else { SDValue V2 = N->getOperand(Vec0Idx + 2); SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(Vec0Idx + 3); if (is64BitVector) - SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); else - SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); } Ops.push_back(SuperReg); Ops.push_back(getI32Imm(Lane)); @@ -2087,7 +2179,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDValue V0 = N->getOperand(FirstTblReg + 0); SDValue V1 = N->getOperand(FirstTblReg + 1); if (NumVecs == 2) - RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); else { SDValue V2 = N->getOperand(FirstTblReg + 2); // If it's a vtbl3, form a quad D-register and leave the last part as @@ -2095,7 +2187,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) : N->getOperand(FirstTblReg + 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); } SmallVector<SDValue, 6> Ops; @@ -2113,10 +2205,10 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, if (!Subtarget->hasV6T2Ops()) return NULL; - unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) + unsigned Opc = isSigned + ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); - // For unsigned extracts, check for a shift right and mask unsigned And_imm = 0; if (N->getOpcode() == ISD::AND) { @@ -2134,7 +2226,29 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, // Note: The width operand is encoded as width-1. unsigned Width = CountTrailingOnes_32(And_imm) - 1; unsigned LSB = Srl_imm; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { + // It's cheaper to use a right shift to extract the top bits. + if (Subtarget->isThumb()) { + Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + } + + // ARM models shift instructions as MOVsi with shifter operand. + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); + SDValue ShOpc = + CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), + MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, + getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5); + } + SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), @@ -2411,7 +2525,7 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { EVT VT = N->getValueType(0); if (!VT.is128BitVector() || N->getNumOperands() != 2) llvm_unreachable("unexpected CONCAT_VECTORS"); - return PairDRegs(VT, N->getOperand(0), N->getOperand(1)); + return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { @@ -2441,6 +2555,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::INLINEASM: { + SDNode *ResNode = SelectInlineAsm(N); + if (ResNode) + return ResNode; + break; + } case ISD::XOR: { // Select special operations if XOR node forms integer ABS pattern SDNode *ResNode = SelectABSOp(N); @@ -2790,13 +2910,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); if (EltVT == MVT::f64) { assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); - return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1)); + return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); } assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); if (NumElts == 2) - return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1)); + return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)); assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); - return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1), + return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); } @@ -3009,17 +3129,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue Chain = N->getOperand(0); - unsigned NewOpc = ARM::LDREXD; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) - NewOpc = ARM::t2LDREXD; + bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); + unsigned NewOpc = isThumb ? ARM::t2LDREXD :ARM::LDREXD; // arm_ldrexd returns a i64 value in {i32, i32} std::vector<EVT> ResTys; - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::i32); + if (isThumb) { + ResTys.push_back(MVT::i32); + ResTys.push_back(MVT::i32); + } else + ResTys.push_back(MVT::Untyped); ResTys.push_back(MVT::Other); - // place arguments in the right order + // Place arguments in the right order. SmallVector<SDValue, 7> Ops; Ops.push_back(MemAddr); Ops.push_back(getAL(CurDAG)); @@ -3032,30 +3154,33 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); - // Until there's support for specifing explicit register constraints - // like the use of even/odd register pair, hardcode ldrexd to always - // use the pair [R0, R1] to hold the load result. - Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R0, - SDValue(Ld, 0), SDValue(0,0)); - Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R1, - SDValue(Ld, 1), Chain.getValue(1)); - // Remap uses. - SDValue Glue = Chain.getValue(1); + SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); if (!SDValue(N, 0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R0, MVT::i32, Glue); - Glue = Result.getValue(2); + SDValue Result; + if (isThumb) + Result = SDValue(Ld, 0); + else { + SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); + SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); + Result = SDValue(ResNode,0); + } ReplaceUses(SDValue(N, 0), Result); } if (!SDValue(N, 1).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R1, MVT::i32, Glue); - Glue = Result.getValue(2); + SDValue Result; + if (isThumb) + Result = SDValue(Ld, 1); + else { + SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); + SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); + Result = SDValue(ResNode,0); + } ReplaceUses(SDValue(N, 1), Result); } - - ReplaceUses(SDValue(N, 2), SDValue(Ld, 2)); + ReplaceUses(SDValue(N, 2), OutChain); return NULL; } @@ -3066,38 +3191,25 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Val1 = N->getOperand(3); SDValue MemAddr = N->getOperand(4); - // Until there's support for specifing explicit register constraints - // like the use of even/odd register pair, hardcode strexd to always - // use the pair [R2, R3] to hold the i64 (i32, i32) value to be stored. - Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ARM::R2, Val0, - SDValue(0, 0)); - Chain = CurDAG->getCopyToReg(Chain, dl, ARM::R3, Val1, Chain.getValue(1)); - - SDValue Glue = Chain.getValue(1); - Val0 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R2, MVT::i32, Glue); - Glue = Val0.getValue(1); - Val1 = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - ARM::R3, MVT::i32, Glue); - // Store exclusive double return a i32 value which is the return status // of the issued store. - std::vector<EVT> ResTys; - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::Other); + EVT ResTys[] = { MVT::i32, MVT::Other }; - // place arguments in the right order + bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); + // Place arguments in the right order. SmallVector<SDValue, 7> Ops; - Ops.push_back(Val0); - Ops.push_back(Val1); + if (isThumb) { + Ops.push_back(Val0); + Ops.push_back(Val1); + } else + // arm_strexd uses GPRPair. + Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); Ops.push_back(MemAddr); Ops.push_back(getAL(CurDAG)); Ops.push_back(CurDAG->getRegister(0, MVT::i32)); Ops.push_back(Chain); - unsigned NewOpc = ARM::STREXD; - if (Subtarget->isThumb() && Subtarget->hasThumb2()) - NewOpc = ARM::t2STREXD; + unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD; SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(), Ops.size()); @@ -3295,7 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { // Form a REG_SEQUENCE to force register allocation. SDValue V0 = N->getOperand(0); SDValue V1 = N->getOperand(1); - SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); SmallVector<SDValue, 6> Ops; Ops.push_back(RegSeq); @@ -3325,11 +3437,152 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectAtomic64(N, ARM::ATOMSWAP6432); case ARMISD::ATOMCMPXCHG64_DAG: return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); + + case ARMISD::ATOMMIN64_DAG: + return SelectAtomic64(N, ARM::ATOMMIN6432); + case ARMISD::ATOMUMIN64_DAG: + return SelectAtomic64(N, ARM::ATOMUMIN6432); + case ARMISD::ATOMMAX64_DAG: + return SelectAtomic64(N, ARM::ATOMMAX6432); + case ARMISD::ATOMUMAX64_DAG: + return SelectAtomic64(N, ARM::ATOMUMAX6432); } return SelectCode(N); } +SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ + std::vector<SDValue> AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>( + N->getOperand(InlineAsm::Op_AsmString)); + StringRef AsmString = StringRef(S->getSymbol()); + + // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. + // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require + // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs + // respectively. Since there is no constraint to explicitly specify a + // reg pair, we search %H operand inside the asm string. If it is found, the + // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. + if (AsmString.find(":H}") == StringRef::npos) + return NULL; + + DebugLoc dl = N->getDebugLoc(); + SDValue Glue = N->getOperand(NumOps-1); + + // Glue node will be appended late. + for(unsigned i = 0; i < NumOps -1; ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } + else + continue; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef + && Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + continue; + + assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i+1); + SDValue V1 = N->getOperand(i+2); + unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); + unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + SDValue Chain = SDValue(N,0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, + Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, + RegCopy); + SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, + RegCopy); + SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, + RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); + GU = T1.getNode(); + } + else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, + Chain.getValue(1)); + SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, + T0.getValue(1)); + SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if(PairedReg.getNode()) { + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( + Flag, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + AsmNodeOperands.push_back(Glue); + if (!Changed) + return NULL; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], + AsmNodeOperands.size()); + New->setNodeId(-1); + return New.getNode(); +} + + bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ff99b04..bb26090 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -23,14 +23,8 @@ #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/GlobalValue.h" -#include "llvm/Instruction.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Type.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -40,14 +34,20 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -504,6 +504,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + setOperationAction(ISD::FMA, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); @@ -515,8 +516,29 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); setOperationAction(ISD::FEXP, MVT::v4f32, Expand); setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); + setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); + setOperationAction(ISD::FRINT, MVT::v4f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); + // Mark v2f32 intrinsics. + setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); + setOperationAction(ISD::FSIN, MVT::v2f32, Expand); + setOperationAction(ISD::FCOS, MVT::v2f32, Expand); + setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); + setOperationAction(ISD::FPOW, MVT::v2f32, Expand); + setOperationAction(ISD::FLOG, MVT::v2f32, Expand); + setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); + setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); + setOperationAction(ISD::FEXP, MVT::v2f32, Expand); + setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); + setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); + setOperationAction(ISD::FRINT, MVT::v2f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); + // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); // Custom handling for some quad-vector types to detect VMULL. @@ -539,6 +561,33 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + + // Custom expand long extensions to vectors. + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + + // NEON does not have single instruction CTPOP for vectors with element + // types wider than 8-bits. However, custom lowering can leverage the + // v8i8/v16i8 vcnt instruction. + setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); + setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); + setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + + // NEON only has FMA instructions as of VFP4. + if (!Subtarget->hasVFP4()) { + setOperationAction(ISD::FMA, MVT::v2f32, Expand); + setOperationAction(ISD::FMA, MVT::v4f32, Expand); + } + setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); @@ -688,7 +737,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); @@ -762,6 +815,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && @@ -814,18 +869,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type - maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; - maxStoresPerMemset = 16; - maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemset = 8; + MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores + MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores + MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); - benefitFromCodePlacementOpt = true; - // Prefer likely predicted branches to selects on out-of-order cores. - predictableSelectIsExpensive = Subtarget->isLikeA9(); + PredictableSelectIsExpensive = Subtarget->isLikeA9(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } @@ -841,10 +897,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair<const TargetRegisterClass*, uint8_t> -ARMTargetLowering::findRepresentativeClass(EVT VT) const{ +ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const TargetRegisterClass *RRC = 0; uint8_t Cost = 1; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.SimpleTy) { default: return TargetLowering::findRepresentativeClass(VT); // Use DPR as representative register class for all floating point @@ -1024,7 +1080,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { /// getRegClassFor - Return the register class that should be used for the /// specified value type. -const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { +const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. @@ -1557,7 +1613,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; if (Subtarget->isTargetELF() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) + getTargetMachine().getRelocationModel() == Reloc::PIC_) OpFlags = ARMII::MO_PLT; Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } @@ -1594,8 +1650,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::MinSize); + bool HasMinSizeAttr = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -1875,6 +1931,17 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return true; } +bool +ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); + return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, + isVarArg)); +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1893,15 +1960,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, isVarArg)); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps; + RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -1930,10 +1991,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(1), Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc // Extract the 2nd half and fall through to handle it as an f64 value. @@ -1946,6 +2009,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), Flag); @@ -1955,15 +2019,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are // stuck together, avoiding something bad. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - SDValue result; + // Update chain and glue. + RetOps[0] = Chain; if (Flag.getNode()) - result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else // Return Void - result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); + RetOps.push_back(Flag); - return result; + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, + RetOps.data(), RetOps.size()); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2214,8 +2279,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - if (RelocM == Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, @@ -2259,8 +2323,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // FIXME: Enable this for static codegen when tool issues are fixed. Also // update ARMFastISel::ARMMaterializeGV. @@ -2288,6 +2350,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (RelocM == Reloc::Static) { CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); } else { + ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); ARMPCLabelIndex = AFI->createPICLabelUId(); unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); ARMConstantPoolValue *CPV = @@ -2368,7 +2431,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; unsigned PCAdj = (RelocM != Reloc::PIC_) @@ -2543,7 +2605,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, } // The remaining GPRs hold either the beginning of variable-argument -// data, or the beginning of an aggregate passed by value (usuall +// data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data // provided by our caller, and store the unallocated registers there. // If this is a variadic function, the va_list pointer will begin with @@ -2628,7 +2690,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); - + SmallVector<SDValue, 16> ArgValues; int lastInsIndex = -1; SDValue ArgValue; @@ -2743,7 +2805,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } else { int FI = MFI->CreateFixedObject(Flags.getByValSize(), VA.getLocMemOffset(), false); - InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); } } else { int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, @@ -3379,6 +3441,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), +/// and size(DestVec) > 128-bits. +/// This is achieved by doing the one extension from the SrcVec, splitting the +/// result, extending these parts, and then concatenating these into the +/// destination. +static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DestVT = N->getValueType(0); + + assert(DestVT.getSizeInBits() > 128 && + "Custom sext/zext expansion needs >128-bit vector."); + // If this is a normal length extension, use the default expansion. + if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && + SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) + return SDValue(); + + DebugLoc dl = N->getDebugLoc(); + unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); + unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); + unsigned NumElts = SrcVT.getVectorNumElements(); + LLVMContext &Ctx = *DAG.getContext(); + SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; + + EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts); + EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts/2); + EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), + NumElts/2); + + Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); + SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(0)); + SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(NumElts/2)); + ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); + ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3532,6 +3635,114 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::CTLZ, dl, VT, rbit); } +/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count +/// for each 16-bit element from operand, repeated. The basic idea is to +/// leverage vcnt to get the 8-bit counts, gather and add the results. +/// +/// Trace for v4i16: +/// input = [v0 v1 v2 v3 ] (vi 16-bit element) +/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) +/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) +/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] +/// [b0 b1 b2 b3 b4 b5 b6 b7] +/// +[b1 b0 b3 b2 b5 b4 b7 b6] +/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, +/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) +static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; + SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); + SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); + SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); + SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); + return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); +} + +/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the +/// bit-count for each 16-bit element from the operand. We need slightly +/// different sequencing for v4i16 and v8i16 to stay within NEON's available +/// 64/128-bit registers. +/// +/// Trace for v4i16: +/// input = [v0 v1 v2 v3 ] (vi 16-bit element) +/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) +/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] +/// v4i16:Extracted = [k0 k1 k2 k3 ] +static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + SDValue BitCounts = getCTPOP16BitCounts(N, DAG); + if (VT.is64BitVector()) { + SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, + DAG.getIntPtrConstant(0)); + } else { + SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, + BitCounts, DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); + } +} + +/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the +/// bit-count for each 32-bit element from the operand. The idea here is +/// to split the vector into 16-bit elements, leverage the 16-bit count +/// routine, and then combine the results. +/// +/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): +/// input = [v0 v1 ] (vi: 32-bit elements) +/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) +/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) +/// vrev: N0 = [k1 k0 k3 k2 ] +/// [k0 k1 k2 k3 ] +/// N1 =+[k1 k0 k3 k2 ] +/// [k0 k2 k1 k3 ] +/// N2 =+[k1 k3 k0 k2 ] +/// [k0 k2 k1 k3 ] +/// Extended =+[k1 k3 k0 k2 ] +/// [k0 k2 ] +/// Extracted=+[k1 k3 ] +/// +static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + + EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; + + SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); + SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); + SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); + SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); + SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); + + if (VT.is64BitVector()) { + SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, + DAG.getIntPtrConstant(0)); + } else { + SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, + DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); + } +} + +static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + EVT VT = N->getValueType(0); + + assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); + assert((VT == MVT::v2i32 || VT == MVT::v4i32 || + VT == MVT::v4i16 || VT == MVT::v8i16) && + "Unexpected type for custom ctpop lowering"); + + if (VT.getVectorElementType() == MVT::i32) + return lowerCTPOP32BitElements(N, DAG); + else + return lowerCTPOP16BitElements(N, DAG); +} + static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); @@ -4153,6 +4364,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ return true; } +/// \return true if this is a reverse operation on an vector. +static bool isReverseMask(ArrayRef<int> M, EVT VT) { + unsigned NumElts = VT.getVectorNumElements(); + // Make sure the mask has the right size. + if (NumElts != M.size()) + return false; + + // Look for <15, ..., 3, -1, 1, 0>. + for (unsigned i = 0; i != NumElts; ++i) + if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) + return false; + + return true; +} + // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. @@ -4247,7 +4473,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, ValueCounts.insert(std::make_pair(V, 0)); unsigned &Count = ValueCounts[V]; - + // Is this value dominant? (takes up more than half of the lanes) if (++Count > (NumElts / 2)) { hasDominantValue = true; @@ -4275,8 +4501,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could - // just use VDUPLANE. - if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + // just use VDUPLANE. We can only do this if the lane being extracted + // is at a constant index, as the VDUP from lane instructions only have + // constant-index forms. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Value->getOperand(1))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element @@ -4291,12 +4520,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), Value, DAG.getConstant(index, MVT::i32)), DAG.getConstant(index, MVT::i32)); - } else { + } else N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, Value->getOperand(0), Value->getOperand(1)); - } - } - else + } else N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); if (!usesOnlyOneValue) { @@ -4328,7 +4555,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (usesOnlyOneValue) { SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); if (isConstant && Val.getNode()) - return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); } } @@ -4548,7 +4775,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isVZIPMask(M, VT, WhichResult) || isVTRN_v_undef_Mask(M, VT, WhichResult) || isVUZP_v_undef_Mask(M, VT, WhichResult) || - isVZIP_v_undef_Mask(M, VT, WhichResult)); + isVZIP_v_undef_Mask(M, VT, WhichResult) || + ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit @@ -4652,6 +4880,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, &VTBLMask[0], 8)); } +static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, + SelectionDAG &DAG) { + DebugLoc DL = Op.getDebugLoc(); + SDValue OpLHS = Op.getOperand(0); + EVT VT = OpLHS.getValueType(); + + assert((VT == MVT::v8i16 || VT == MVT::v16i8) && + "Expect an v8i16/v16i8 type"); + OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); + // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, + // extract the first 8 bytes into the top double word and the last 8 bytes + // into the bottom double word. The v8i16 case is similar. + unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; + return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, + DAG.getConstant(ExtractNum, MVT::i32)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4789,6 +5034,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) + return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); + if (VT == MVT::v8i8) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); if (NewOp.getNode()) @@ -4917,16 +5165,76 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { return false; } -/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending -/// load, or BUILD_VECTOR with extended elements, return the unextended value. -static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { +/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total +/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. +/// We insert the required extension here to get the vector to fill a D register. +static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, + const EVT &OrigTy, + const EVT &ExtTy, + unsigned ExtOpcode) { + // The vector originally had a size of OrigTy. It was then extended to ExtTy. + // We expect the ExtTy to be 128-bits total. If the OrigTy is less than + // 64-bits we need to insert a new extension so that it will be 64-bits. + assert(ExtTy.is128BitVector() && "Unexpected extension size"); + if (OrigTy.getSizeInBits() >= 64) + return N; + + // Must extend size to at least 64 bits to be used as an operand for VMULL. + MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy; + EVT NewVT; + switch (OrigSimpleTy) { + default: llvm_unreachable("Unexpected Orig Vector Type"); + case MVT::v2i8: + case MVT::v2i16: + NewVT = MVT::v2i32; + break; + case MVT::v4i8: + NewVT = MVT::v4i16; + break; + } + return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); +} + +/// SkipLoadExtensionForVMULL - return a load of the original vector size that +/// does not do any sign/zero extension. If the original vector is less +/// than 64 bits, an appropriate extension will be added after the load to +/// reach a total size of 64 bits. We have to add the extension separately +/// because ARM does not have a sign/zero extending load for vectors. +static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { + SDValue NonExtendingLoad = + DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment()); + unsigned ExtOp = 0; + switch (LD->getExtensionType()) { + default: llvm_unreachable("Unexpected LoadExtType"); + case ISD::EXTLOAD: + case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break; + case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break; + } + MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy; + MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy; + return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG, + MemType, ExtType, ExtOp); +} + +/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, +/// extending load, or BUILD_VECTOR with extended elements, return the +/// unextended value. The unextended vector should be 64 bits so that it can +/// be used as an operand to a VMULL instruction. If the original vector size +/// before extension is less than 64 bits we add a an extension to resize +/// the vector to 64 bits. +static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) - return N->getOperand(0); + return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, + N->getOperand(0)->getValueType(0), + N->getValueType(0), + N->getOpcode()); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) - return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), - LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->isInvariant(), - LD->getAlignment()); + return SkipLoadExtensionForVMULL(LD, DAG); + // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. if (N->getOpcode() == ISD::BITCAST) { @@ -4981,7 +5289,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. EVT VT = Op.getValueType(); - assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); + assert(VT.is128BitVector() && VT.isInteger() && + "unexpected type for custom-lowering ISD::MUL"); SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; @@ -5024,9 +5333,9 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Legalize to a VMULL instruction. DebugLoc DL = Op.getDebugLoc(); SDValue Op0; - SDValue Op1 = SkipExtension(N1, DAG); + SDValue Op1 = SkipExtensionForVMULL(N1, DAG); if (!isMLA) { - Op0 = SkipExtension(N0, DAG); + Op0 = SkipExtensionForVMULL(N0, DAG); assert(Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"); @@ -5041,8 +5350,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // vaddl q0, d4, d5 // vmovl q1, d6 // vmul q0, q0, q1 - SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); - SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); + SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); + SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); EVT Op1VT = Op1.getValueType(); return DAG.getNode(N0->getOpcode(), DL, VT, DAG.getNode(NewOpc, DL, VT, @@ -5328,6 +5637,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL_PARTS: case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); + case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); @@ -5360,6 +5670,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = ExpandVectorExtension(N, DAG); + break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); @@ -5388,6 +5702,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_CMP_SWAP: ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); return; + case ISD::ATOMIC_LOAD_MIN: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); + return; + case ISD::ATOMIC_LOAD_UMIN: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); + return; + case ISD::ATOMIC_LOAD_MAX: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); + return; + case ISD::ATOMIC_LOAD_UMAX: + ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -5727,7 +6053,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock * ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned Op1, unsigned Op2, - bool NeedsCarry, bool IsCmpxchg) const { + bool NeedsCarry, bool IsCmpxchg, + bool IsMinMax, ARMCC::CondCodes CC) const { // This also handles ATOMIC_SWAP, indicated by Op1==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -5751,21 +6078,17 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); } - unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; - unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; - if (IsCmpxchg) { + if (IsCmpxchg || IsMinMax) contBB = MF->CreateMachineBasicBlock(LLVM_BB); + if (IsCmpxchg) cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); - } MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); - if (IsCmpxchg) { - MF->insert(It, contBB); - MF->insert(It, cont2BB); - } + if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); + if (IsCmpxchg) MF->insert(It, cont2BB); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. @@ -5792,22 +6115,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // cmp storesuccess, #0 // bne- loopMBB // fallthrough --> exitMBB - // - // Note that the registers are explicitly specified because there is not any - // way to force the register allocator to allocate a register pair. - // - // FIXME: The hardcoded registers are not necessary for Thumb2, but we - // need to properly enforce the restriction that the two output registers - // for ldrexd must be different. BB = loopMBB; + // Load - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(ARM::R2, RegState::Define) - .addReg(ARM::R3, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo).addReg(ARM::R2); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi).addReg(ARM::R3); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } + unsigned StoreLo, StoreHi; if (IsCmpxchg) { // Add early exit for (unsigned i = 0; i < 2; i++) { @@ -5823,26 +6150,60 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, } // Copy to physregs for strexd - unsigned setlo = MI->getOperand(5).getReg(); - unsigned sethi = MI->getOperand(6).getReg(); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(setlo); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(sethi); + StoreLo = MI->getOperand(5).getReg(); + StoreHi = MI->getOperand(6).getReg(); } else if (Op1) { // Perform binary operation - AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), ARM::R0) + unsigned tmpRegLo = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) .addReg(destlo).addReg(vallo)) .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), ARM::R1) - .addReg(desthi).addReg(valhi)).addReg(0); + unsigned tmpRegHi = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) + .addReg(desthi).addReg(valhi)) + .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); + + StoreLo = tmpRegLo; + StoreHi = tmpRegHi; } else { // Copy to physregs for strexd - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R0).addReg(vallo); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), ARM::R1).addReg(valhi); + StoreLo = vallo; + StoreHi = valhi; + } + if (IsMinMax) { + // Compare and branch to exit block. + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); + BB->addSuccessor(exitMBB); + BB->addSuccessor(contBB); + BB = contBB; + StoreLo = vallo; + StoreHi = valhi; } // Store - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(ARM::R0).addReg(ARM::R1).addReg(ptr)); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); + } else { + // Marshal a pair... + unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(UndefPair) + .addReg(StoreLo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) + .addReg(r1) + .addReg(StoreHi) + .addImm(ARM::gsub_1); + + // ...and store it + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + .addReg(StorePair).addReg(ptr)); + } // Cmp+jump AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(storesuccess).addImm(0)); @@ -6043,6 +6404,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); unsigned MJTI = JTI->createJumpTableIndex(LPadList); unsigned UId = AFI->createJumpTableUId(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); // Create the MBBs for the dispatch code. @@ -6051,7 +6413,13 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DispatchBB->setIsLandingPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); - BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); + unsigned trap_opcode; + if (Subtarget->isThumb()) + trap_opcode = ARM::tTRAP; + else + trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; + + BuildMI(TrapBB, dl, TII->get(trap_opcode)); DispatchBB->addSuccessor(TrapBB); MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); @@ -6197,11 +6565,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { .addImm(0) .addMemOperand(JTMMOLd)); - unsigned NewVReg6 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg5, RegState::Kill) - .addReg(NewVReg3)); + unsigned NewVReg6 = NewVReg5; + if (RelocM == Reloc::PIC_) { + NewVReg6 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg3)); + } BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) .addReg(NewVReg6, RegState::Kill) @@ -6281,11 +6652,18 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { .addImm(0) .addMemOperand(JTMMOLd)); - BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) - .addReg(NewVReg5, RegState::Kill) - .addReg(NewVReg4) - .addJumpTableIndex(MJTI) - .addImm(UId); + if (RelocM == Reloc::PIC_) { + BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg4) + .addJumpTableIndex(MJTI) + .addImm(UId); + } else { + BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) + .addReg(NewVReg5, RegState::Kill) + .addJumpTableIndex(MJTI) + .addImm(UId); + } } // Add the jump table entries as successors to the MBB. @@ -6334,7 +6712,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DefRegs[OI->getReg()] = true; } - MachineInstrBuilder MIB(&*II); + MachineInstrBuilder MIB(*MF, &*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { unsigned Reg = SavedRegs[i]; @@ -6411,8 +6789,9 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->getFnAttributes(). - hasAttribute(Attributes::NoImplicitFloat) && + if (!MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) { ldrOpc = ARM::VLD1q32wb_fixed; @@ -6840,6 +7219,26 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ false, /*IsCmpxchg*/true); + case ARM::ATOMMIN6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::LT); + case ARM::ATOMMAX6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::GE); + case ARM::ATOMUMIN6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::LO); + case ARM::ATOMUMAX6432: + return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, + isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, + /*NeedsCarry*/ true, /*IsCmpxchg*/false, + /*IsMinMax*/ true, ARMCC::HS); case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the @@ -9111,7 +9510,7 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { +bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); @@ -9120,15 +9519,27 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { return false; case MVT::i8: case MVT::i16: - case MVT::i32: + case MVT::i32: { // Unaligned access can use (for example) LRDB, LRDH, LDR - return AllowsUnaligned; + if (AllowsUnaligned) { + if (Fast) + *Fast = Subtarget->hasV7Ops(); + return true; + } + return false; + } case MVT::f64: - case MVT::v2f64: + case MVT::v2f64: { // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. // A big-endian target may also explictly support unaligned accesses - return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian()); + if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { + if (Fast) + *Fast = true; + return true; + } + return false; + } } } @@ -9140,33 +9551,59 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... - if (IsZeroVal && - !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat) && - Subtarget->hasNEON()) { - if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { - return MVT::v4i32; - } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { - return MVT::v2i32; + if ((!IsMemset || ZeroMemset) && + Subtarget->hasNEON() && + !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat)) { + bool Fast; + if (Size >= 16 && + (memOpAlign(SrcAlign, DstAlign, 16) || + (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { + return MVT::v2f64; + } else if (Size >= 8 && + (memOpAlign(SrcAlign, DstAlign, 8) || + (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { + return MVT::f64; } } // Lowering to i32/i16 if the size permits. - if (Size >= 4) { + if (Size >= 4) return MVT::i32; - } else if (Size >= 2) { + else if (Size >= 2) return MVT::i16; - } // Let the target-independent logic figure it out. return MVT::Other; } +bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + if (Val.getOpcode() != ISD::LOAD) + return false; + + EVT VT1 = Val.getValueType(); + if (!VT1.isSimple() || !VT1.isInteger() || + !VT2.isSimple() || !VT2.isInteger()) + return false; + + switch (VT1.getSimpleVT().SimpleTy) { + default: break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. + return true; + } + + return false; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 4eb3b2c..9ee17f0 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -17,11 +17,11 @@ #include "ARM.h" #include "ARMSubtarget.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" #include <vector> namespace llvm { @@ -232,7 +232,11 @@ namespace llvm { ATOMAND64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG, - ATOMCMPXCHG64_DAG + ATOMCMPXCHG64_DAG, + ATOMMIN64_DAG, + ATOMUMIN64_DAG, + ATOMMAX64_DAG, + ATOMUMAX64_DAG }; } @@ -248,7 +252,7 @@ namespace llvm { public: explicit ARMTargetLowering(TargetMachine &TM); - virtual unsigned getJumpTableEncoding(void) const; + virtual unsigned getJumpTableEncoding() const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -281,15 +285,19 @@ namespace llvm { bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows - /// unaligned memory accesses. of the specified type. - virtual bool allowsUnalignedMemoryAccesses(EVT VT) const; + /// unaligned memory accesses of the specified type. Returns whether it + /// is "fast" by reference in the second argument. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + using TargetLowering::isZExtFree; + virtual bool isZExtFree(SDValue Val, EVT VT2) const; + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; @@ -358,7 +366,7 @@ namespace llvm { /// getRegClassFor - Return the register class that should be used for the /// specified value type. - virtual const TargetRegisterClass *getRegClassFor(EVT VT) const; + virtual const TargetRegisterClass *getRegClassFor(MVT VT) const; /// getMaximalGlobalOffset - Returns the maximal possible offset which can /// be used for loads / stores from the global. @@ -384,7 +392,7 @@ namespace llvm { unsigned Intrinsic) const; protected: std::pair<const TargetRegisterClass*, uint8_t> - findRepresentativeClass(EVT VT) const; + findRepresentativeClass(MVT VT) const; private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can @@ -495,6 +503,12 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; + + virtual bool CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const; + virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -526,7 +540,9 @@ namespace llvm { unsigned Op1, unsigned Op2, bool NeedsCarry = false, - bool IsCmpxchg = false) const; + bool IsCmpxchg = false, + bool IsMinMax = false, + ARMCC::CondCodes CC = ARMCC::AL) const; MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index a0b6f24..80f0ec7 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -22,8 +22,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index df2e55e..11550c5 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -117,7 +117,7 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; @@ -239,6 +239,9 @@ def IsARM : Predicate<"!Subtarget->isThumb()">, def IsIOS : Predicate<"Subtarget->isTargetIOS()">; def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; +def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, + AssemblerPredicate<"FeatureNaClTrap", "NaCl">; +def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; @@ -417,6 +420,8 @@ def reglist : Operand<i32> { let DecoderMethod = "DecodeRegListOperand"; } +def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">; + def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; } def dpr_reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; @@ -1005,7 +1010,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -1017,7 +1023,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1032,7 +1039,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>, + Sched<[WriteALUsi, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1047,7 +1055,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>, + Sched<[WriteALUsr, ReadALUsr]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1074,7 +1083,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -1086,7 +1096,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rd, $Rn, $Rm", - [/* pattern left blank */]> { + [/* pattern left blank */]>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1100,7 +1111,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1115,7 +1127,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>, + Sched<[WriteALUsr, ReadALUsr]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1140,24 +1153,28 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>; + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]>; def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p), 4, iir, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> { + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; } def rsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, - so_reg_imm:$shift))]>; + so_reg_imm:$shift))]>, + Sched<[WriteALUsi, ReadALU]>; def rsr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, - so_reg_reg:$shift))]>; + so_reg_reg:$shift))]>, + Sched<[WriteALUSsr, ReadALUsr]>; } } @@ -1169,19 +1186,22 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>; + [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>, + Sched<[WriteALU, ReadALU]>; def rsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, - GPR:$Rn))]>; + GPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]>; def rsr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, - GPR:$Rn))]>; + GPR:$Rn))]>, + Sched<[WriteALUSsr, ReadALUsr]>; } } @@ -1194,7 +1214,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rn, $imm", - [(opnode GPR:$Rn, so_imm:$imm)]> { + [(opnode GPR:$Rn, so_imm:$imm)]>, + Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; let Inst{25} = 1; @@ -1207,7 +1228,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, } def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rn, $Rm", - [(opnode GPR:$Rn, GPR:$Rm)]> { + [(opnode GPR:$Rn, GPR:$Rm)]>, + Sched<[WriteCMP, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rm; let isCommutable = Commutable; @@ -1223,7 +1245,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, def rsi : AI1<opcod, (outs), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, opc, "\t$Rn, $shift", - [(opnode GPR:$Rn, so_reg_imm:$shift)]> { + [(opnode GPR:$Rn, so_reg_imm:$shift)]>, + Sched<[WriteCMPsi, ReadALU]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -1239,7 +1262,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, def rsr : AI1<opcod, (outs), (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rn, $shift", - [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]> { + [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>, + Sched<[WriteCMPsr, ReadALU]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -1321,7 +1345,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>, - Requires<[IsARM]> { + Requires<[IsARM]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -1333,7 +1358,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>, - Requires<[IsARM]> { + Requires<[IsARM]>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1348,7 +1374,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>, - Requires<[IsARM]> { + Requires<[IsARM]>, + Sched<[WriteALUsi, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1364,7 +1391,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>, - Requires<[IsARM]> { + Requires<[IsARM]>, + Sched<[WriteALUsr, ReadALUsr]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1387,7 +1415,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>, - Requires<[IsARM]> { + Requires<[IsARM]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -1398,7 +1427,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm", - [/* pattern left blank */]> { + [/* pattern left blank */]>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1411,7 +1441,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>, - Requires<[IsARM]> { + Requires<[IsARM]>, + Sched<[WriteALUsi, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1425,7 +1456,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>, - Requires<[IsARM]> { + Requires<[IsARM]>, + Sched<[WriteALUsr, ReadALUsr]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1622,6 +1654,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, GPR:$set1, GPR:$set2), NoItinerary, []>; +def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; +def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2), + NoItinerary, []>; } def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary, @@ -1748,11 +1792,32 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", let Inst{3-0} = opt; } -// A5.4 Permanently UNDEFINED instructions. +/* + * A5.4 Permanently UNDEFINED instructions. + * + * For most targets use UDF #65006, for which the OS will generate SIGTRAP. + * Other UDF encodings generate SIGILL. + * + * NaCl's OS instead chooses an ARM UDF encoding that's also a UDF in Thumb. + * Encoding A1: + * 1110 0111 1111 iiii iiii iiii 1111 iiii + * Encoding T1: + * 1101 1110 iiii iiii + * It uses the following encoding: + * 1110 0111 1111 1110 1101 1110 1111 0000 + * - In ARM: UDF #60896; + * - In Thumb: UDF #254 followed by a branch-to-self. + */ +let isBarrier = 1, isTerminator = 1 in +def TRAPNaCl : AXI<(outs), (ins), MiscFrm, NoItinerary, + "trap", [(trap)]>, + Requires<[IsARM,UseNaClTrap]> { + let Inst = 0xe7fedef0; +} let isBarrier = 1, isTerminator = 1 in def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, "trap", [(trap)]>, - Requires<[IsARM]> { + Requires<[IsARM,DontUseNaClTrap]> { let Inst = 0xe7ffdefe; } @@ -1804,7 +1869,8 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in // the instruction. The {24-21} opcode bits are set by the fixup, as we don't // know until then which form of the instruction will be used. def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), - MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> { + MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<14> label; let Inst{27-25} = 0b001; @@ -2065,6 +2131,18 @@ def SRSIB_UPD : SRSI<1, "srsib\tsp!, $mode"> { let Inst{24-23} = 0b11; } +def : ARMInstAlias<"srsda $mode", (SRSDA imm0_31:$mode)>; +def : ARMInstAlias<"srsda $mode!", (SRSDA_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsdb $mode", (SRSDB imm0_31:$mode)>; +def : ARMInstAlias<"srsdb $mode!", (SRSDB_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsia $mode", (SRSIA imm0_31:$mode)>; +def : ARMInstAlias<"srsia $mode!", (SRSIA_UPD imm0_31:$mode)>; + +def : ARMInstAlias<"srsib $mode", (SRSIB imm0_31:$mode)>; +def : ARMInstAlias<"srsib $mode!", (SRSIB_UPD imm0_31:$mode)>; + // Return From Exception class RFEI<bit wb, string asm> : XI<(outs), (ins GPR:$Rn), AddrModeNone, 4, IndexModeNone, BrFrm, @@ -3816,28 +3894,33 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "clz", "\t$Rd, $Rm", - [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>; + [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>, + Sched<[WriteALU]>; def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "rbit", "\t$Rd, $Rm", [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>, - Requires<[IsARM, HasV6T2]>; + Requires<[IsARM, HasV6T2]>, + Sched<[WriteALU]>; def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "rev", "\t$Rd, $Rm", - [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>; + [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>, + Sched<[WriteALU]>; let AddedComplexity = 5 in def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "rev16", "\t$Rd, $Rm", [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteALU]>; let AddedComplexity = 5 in def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteALU]>; def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)), (and (srl GPR:$Rm, (i32 8)), 0xFF)), @@ -3849,7 +3932,8 @@ def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd), [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF), (and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh), 0xFFFF0000)))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteALUsi, ReadALU]>; // Alternate cases for PKHBT where identities eliminate some nodes. def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)), @@ -3865,7 +3949,8 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), [(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000), (and (sra GPRnopc:$Rm, pkh_asr_amt:$sh), 0xFFFF)))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteALUsi, ReadALU]>; // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. @@ -4229,8 +4314,8 @@ def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), NoItinerary, "ldrex", "\t$Rt, $addr", []>; let hasExtraDefRegAllocReq = 1 in -def LDREXD: AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2),(ins addr_offset_none:$addr), - NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []> { +def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), + NoItinerary, "ldrexd", "\t$Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegLoad"; } } @@ -4244,8 +4329,8 @@ def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), - (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr), - NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> { + (ins GPRPairOp:$Rt, addr_offset_none:$addr), + NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3cf213c..0411ac4 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4264,6 +4264,7 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$Vd, $Vm, #0", NEONvceqz>; @@ -4277,10 +4278,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, NEONvcge, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", "$Vd, $Vm, #0", NEONvcgez>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$Vd, $Vm, #0", NEONvclez>; +} // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -4292,10 +4295,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, NEONvcgt, 0>; +let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", "$Vd, $Vm, #0", NEONvcgtz>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", "$Vd, $Vm, #0", NEONvcltz>; +} // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", @@ -4877,12 +4882,15 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; -def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs", "f32", - v2f32, v2f32, int_arm_neon_vabs>; -def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAQ, "vabs", "f32", - v4f32, v4f32, int_arm_neon_vabs>; +def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v2f32, v2f32, fabs>; +def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + "vabs", "f32", + v4f32, v4f32, fabs>; + +def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; +def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, @@ -5737,6 +5745,10 @@ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +// Fold extracting an element out of a v2i32 into a vfp register. +def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), + (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; + // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 002d64a..c9d709e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -130,8 +130,9 @@ def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{ let ParserMatchClass = imm0_4095_neg_asmoperand; } -def imm0_255_neg : PatLeaf<(i32 imm), [{ - return (uint32_t)(-N->getZExtValue()) < 255; +def imm1_255_neg : PatLeaf<(i32 imm), [{ + uint32_t Val = -N->getZExtValue(); + return (Val > 0 && Val < 255); }], imm_neg_XFORM>; def imm0_255_not : PatLeaf<(i32 imm), [{ @@ -1928,8 +1929,8 @@ defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; // The AddedComplexity preferences the first variant over the others since // it can be shrunk to a 16-bit wide encoding, while the others cannot. let AddedComplexity = 1 in -def : T2Pat<(add GPR:$src, imm0_255_neg:$imm), - (t2SUBri GPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(add GPR:$src, imm1_255_neg:$imm), + (t2SUBri GPR:$src, imm1_255_neg:$imm)>; def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), @@ -1938,8 +1939,8 @@ def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm), (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; let AddedComplexity = 1 in -def : T2Pat<(ARMaddc rGPR:$src, imm0_255_neg:$imm), - (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>; +def : T2Pat<(ARMaddc rGPR:$src, imm1_255_neg:$imm), + (t2SUBSri rGPR:$src, imm1_255_neg:$imm)>; def : T2Pat<(ARMaddc rGPR:$src, t2_so_imm_neg:$imm), (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>; def : T2Pat<(ARMaddc rGPR:$src, imm0_65535_neg:$imm), @@ -2314,13 +2315,15 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn", /// changed to modify CPSR. multiclass T2I_un_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Cheap = 0, bit ReMat = 0> { + PatFrag opnode, + bit Cheap = 0, bit ReMat = 0, bit MoveImm = 0> { // shifted imm def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii, opc, "\t$Rd, $imm", [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; + let isMoveImm = MoveImm; let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -2354,7 +2357,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <0b0011, "mvn", IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi, - UnOpFrag<(not node:$Src)>, 1, 1>; + UnOpFrag<(not node:$Src)>, 1, 1, 1>; let AddedComplexity = 1 in def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm), @@ -3478,6 +3481,13 @@ def t2SRSIA_UPD : T2SRS<0b11, 1, (outs), (ins imm0_31:$mode), NoItinerary, def t2SRSIA : T2SRS<0b11, 0, (outs), (ins imm0_31:$mode), NoItinerary, "srsia","\tsp, $mode", []>; + +def : t2InstAlias<"srsdb${p} $mode", (t2SRSDB imm0_31:$mode, pred:$p)>; +def : t2InstAlias<"srsdb${p} $mode!", (t2SRSDB_UPD imm0_31:$mode, pred:$p)>; + +def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>; +def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>; + // Return From Exception is a system instruction. class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 254d8f6..351a290 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -17,12 +17,12 @@ #include "ARMConstantPoolValue.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" -#include "llvm/Function.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" #include <cstdlib> using namespace llvm; diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index 7928184..23a6a9b 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -15,12 +15,12 @@ #define ARMJITINFO_H #include "ARMMachineFunctionInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetJITInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" namespace llvm { class ARMTargetMachine; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 0185289..b7ac5d5 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -18,8 +18,12 @@ #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -27,19 +31,15 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); @@ -87,6 +87,53 @@ namespace { MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; + class UnitRegsMap { + public: + UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {} + const SmallVector<unsigned, 4>& operator[](unsigned Reg) { + DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found = + Cache.find(Reg); + if (found != Cache.end()) + return found->second; + else + return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg))) + .first->second; + } + private: + SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) { + SmallVector<unsigned, 4> Res; + + const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg); + if (TRC == &ARM::QPRRegClass) { + if (Reg > ARM::Q7) { + Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1)); + return Res; + } + + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3)); + + return Res; + } + + if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) { + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0)); + Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1)); + + return Res; + } + + Res.push_back(Reg); + + return Res; + + } + const TargetRegisterInfo* TRI; + DenseMap<unsigned, SmallVector<unsigned, 4> > Cache; + }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -128,6 +175,11 @@ namespace { MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I); + unsigned AddMemOp(MemOpQueue& MemOps, + const MemOpQueueEntry newEntry, + UnitRegsMap& UnitRegsInfo, + SmallSet<unsigned, 4>& UsedUnitRegs, + unsigned At = -1U); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -865,7 +917,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. - if (isLd && MI->getOperand(0).getReg() == Base) + if (MI->getOperand(0).getReg() == Base) return false; unsigned PredReg = 0; @@ -1188,7 +1240,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddDeadKill = true; } // Never kill the base register in the first instruction. - // <rdar://problem/11101911> if (EvenReg == BaseReg) EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, @@ -1214,12 +1265,103 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, return false; } +/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti. +/// It adds store mem ops with simple push_back/insert method, +/// without any additional logic. +/// For load operation it does the next: +/// 1. Adds new load operation into MemOp collection at "At" position. +/// 2. Removes any "load" operations from MemOps, that changes "Reg" register +/// contents, prior to "At". +/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector > +/// UsedUnitRegs - set of unit-registers currently in use. +/// At - position at which it would added, and prior which the clean-up +/// should be made (for load operation). +/// FIXME: The clean-up also should be made for store operations, +/// but the memory address should be analyzed instead of unit registers. +unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps, + const MemOpQueueEntry NewEntry, + UnitRegsMap& UnitRegsInfo, + SmallSet<unsigned, 4>& UsedUnitRegs, + unsigned At) { + unsigned Cleaned = 0; + + if (At == -1U) { + At = MemOps.size(); + MemOps.push_back(NewEntry); + } else + MemOps.insert(&MemOps[At], NewEntry); + + // FIXME: + // If operation is not load, leave it as is by now, + // So 0 overridden ops would cleaned in this case. + if (!NewEntry.MBBI->mayLoad()) + return 0; + + const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg]; + + bool FoundOverriddenLoads = false; + + for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i) + if (UsedUnitRegs.count(NewEntryUnitRegs[i])) { + FoundOverriddenLoads = true; + break; + } + + // If we detect that this register is used by load operations that are + // predecessors for the new one, remove them from MemOps then. + if (FoundOverriddenLoads) { + MemOpQueue UpdatedMemOps; + + // Scan through MemOps entries. + for (unsigned i = 0; i != At; ++i) { + MemOpQueueEntry& MemOpEntry = MemOps[i]; + + // FIXME: Skip non-load operations by now. + if (!MemOpEntry.MBBI->mayLoad()) + continue; + + const SmallVector<unsigned, 4>& MemOpUnitRegs = + UnitRegsInfo[MemOpEntry.Reg]; + + // Lookup entry that loads contents into register used by new entry. + bool ReleaseThisEntry = false; + for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) { + if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(), + MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) { + ReleaseThisEntry = true; + ++Cleaned; + break; + } + } + + if (ReleaseThisEntry) { + const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg]; + for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r) + UsedUnitRegs.erase(RelesedRegs[r]); + } else + UpdatedMemOps.push_back(MemOpEntry); + } + + // Keep anything without changes after At position. + for (unsigned i = At, e = MemOps.size(); i != e; ++i) + UpdatedMemOps.push_back(MemOps[i]); + + MemOps.swap(UpdatedMemOps); + } + + UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end()); + + return Cleaned; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned NumMerges = 0; unsigned NumMemOps = 0; MemOpQueue MemOps; + UnitRegsMap UnitRegsInfo(TRI); + SmallSet<unsigned, 4> UsedRegUnits; unsigned CurrBase = 0; int CurrOpc = -1; unsigned CurrSize = 0; @@ -1266,8 +1408,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); ++NumMemOps; + const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg]; + UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end()); Advance = true; } else { if (Clobber) { @@ -1279,20 +1424,24 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; + unsigned OverridesCleaned = + AddMemOp(MemOps, + MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI), + UnitRegsInfo, UsedRegUnits) != 0; + NumMemOps += 1 - OverridesCleaned; Advance = true; } else { - for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); - I != E; ++I) { - if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, - Position, MBBI)); - ++NumMemOps; + for (unsigned I = 0; I != NumMemOps; ++I) { + if (Offset < MemOps[I].Offset) { + MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI); + unsigned OverridesCleaned = + AddMemOp(MemOps, entry, UnitRegsInfo, + UsedRegUnits, I) != 0; + NumMemOps += 1 - OverridesCleaned; + Advance = true; break; - } else if (Offset == I->Offset) { + } else if (Offset == MemOps[I].Offset) { // Collision! This can't be merged! break; } @@ -1363,6 +1512,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrPredReg = 0; if (NumMemOps) { MemOps.clear(); + UsedRegUnits.clear(); NumMemOps = 0; } @@ -1408,7 +1558,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); PrevMI->setDesc(TII->get(NewOpc)); MO.setReg(ARM::PC); - PrevMI->copyImplicitOps(&*MBBI); + PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); MBB.erase(MBBI); return true; } diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index e2ac9a4..b641483 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -15,8 +15,8 @@ #include "ARM.h" #include "ARMAsmPrinter.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/Constants.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index c0ac04b..88d96c0 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -15,10 +15,10 @@ #define ARMMACHINEFUNCTIONINFO_H #include "ARMSubtarget.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 02196d0..2d088de 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -6,6 +6,77 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Instruction scheduling annotations for out-of-order CPUs. +// These annotations are independent of the itinerary class defined below. +// Here we define the subtarget independent read/write per-operand resources. +// The subtarget schedule definitions will then map these to the subtarget's +// resource usages. +// For example: +// The instruction cycle timings table might contain an entry for an operation +// like the following: +// Rd <- ADD Rn, Rm, <shift> Rs +// Uops | Latency from register | Uops - resource requirements - latency +// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3 +// | | uopc Rd, Rn, T0 - P01 - 1 +// This is telling us that the result will be available in destination register +// Rd after a minimum of three cycles after the result in Rm and Rs is available +// and one cycle after the result in Rn is available. The micro-ops can execute +// on resource P01. +// To model this, we need to express that we need to dispatch two micro-ops, +// that the resource P01 is needed and that the latency to Rn is different than +// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by +// two. +// We will do this by assigning (abstract) resources to register defs/uses. +// ARMSchedule.td: +// def WriteALUsr : SchedWrite; +// def ReadAdvanceALUsr : ScheRead; +// +// ARMInstrInfo.td: +// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault, +// ReadDefault]> { ...} +// ReadAdvance read resources allow us to define "pipeline by-passes" or +// shorter latencies to certain registers as needed in the example above. +// The "ReadDefault" can be omitted. +// Next, the subtarget td file assigns resources to the abstract resources +// defined here. +// ARMScheduleSubtarget.td: +// // Resources. +// def P01 : ProcResource<3>; // ALU unit (3 of it). +// ... +// // Resource usages. +// def : WriteRes<WriteALUsr, [P01, P01]> { +// Latency = 4; // Latency of 4. +// NumMicroOps = 2; // Dispatch 2 micro-ops. +// // The two instances of resource P01 are occupied for one cycle. It is one +// // cycle because these resources happen to be pipelined. +// ResourceCycles = [1, 1]; +// } +// def : ReadAdvance<ReadAdvanceALUsr, 3>; + +// Basic ALU operation. +def WriteALU : SchedWrite; +def ReadALU : SchedRead; + +// Basic ALU with shifts. +def WriteALUsi : SchedWrite; // Shift by immediate. +def WriteALUsr : SchedWrite; // Shift by register. +def WriteALUSsr : SchedWrite; // Shift by register (flag setting). +def ReadALUsr : SchedRead; // Some operands are read later. + +// Compares. +def WriteCMP : SchedWrite; +def WriteCMPsi : SchedWrite; +def WriteCMPsr : SchedWrite; + +// Define TII for use in SchedVariant Predicates. +def : PredicateProlog<[{ + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); + (void)TII; +}]>; + +def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>; //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 404634f..9739ed2 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let ILPWindow = 10; // Don't reschedule small blocks to hide + // latency. Minimum latency requirements are already + // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; @@ -1895,6 +1898,8 @@ def CortexA9Model : SchedMachineModel { //===----------------------------------------------------------------------===// // Define each kind of processor resource and number available. +let SchedModel = CortexA9Model in { + def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } def A9UnitAGU : ProcResource<1>; @@ -1915,11 +1920,11 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>; def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } // Basic ALU. -def A9WriteA : SchedWriteRes<[A9UnitALU]>; +def A9WriteALU : SchedWriteRes<[A9UnitALU]>; // ALU with operand shifted by immediate. -def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } +def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; } // ALU with operand shifted by register. -def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } +def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } // Multiplication def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } @@ -2000,13 +2005,6 @@ foreach NumCycles = 2-8 in { def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; } // foreach NumCycles -// Define TII for use in SchedVariant Predicates. -def : PredicateProlog<[{ - const ARMBaseInstrInfo *TII = - static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); - (void)TII; -}]>; - // Define address generation sequences and predicates for 8 flavors of LDMs. foreach NumAddr = 1-8 in { @@ -2251,11 +2249,11 @@ def A9WriteLMfp : SchedWriteVariant<[ // These mov immediate writers are unconditionally expanded with // additive latency. def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; -def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>; +def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>; def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; // Some ALU operations can read loaded integer values one cycle early. -def A9ReadA : SchedReadAdvance<1, +def A9ReadALU : SchedReadAdvance<1, [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, @@ -2276,26 +2274,25 @@ def A9Read4 : SchedReadAdvance<3>; // This table follows the ARM Cortex-A9 Technical Reference Manuals, // mostly in order. -let SchedModel = CortexA9Model in { def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, IIC_iMVNi,IIC_iMVNsi, IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; -def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>; +def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>; def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; -def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; -def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; -def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>; -def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; -def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>; -def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB -def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; -def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>; +def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; +def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; +def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>; +def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; +def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>; +def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB +def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; +def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; // A9WriteHi ignored for MUL32. def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, @@ -2368,7 +2365,7 @@ def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, IIC_iStore_m, IIC_iStore_mu]>; def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; -def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>; +def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>; def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; @@ -2483,4 +2480,17 @@ def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; + +// Map SchedRWs that are identical for cortexa9 to existing resources. +def : SchedAlias<WriteALU, A9WriteALU>; +def : SchedAlias<WriteALUsr, A9WriteALUsr>; +def : SchedAlias<WriteALUSsr, A9WriteALUsr>; +def : SchedAlias<ReadALU, A9ReadALU>; +def : SchedAlias<ReadALUsr, A9ReadALU>; +// FIXME: need to special case AND, ORR, EOR, BIC because they don't read +// advance. But our instrinfo claims it does. + +def : SchedAlias<WriteCMP, A9WriteALU>; +def : SchedAlias<WriteCMPsi, A9WriteALU>; +def : SchedAlias<WriteCMPsr, A9WriteALU>; } // SchedModel = CortexA9Model diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index e9bc3e0..7c6df41 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -1078,8 +1078,67 @@ def SwiftModel : SchedMachineModel { let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. let MinLatency = 0; // Data dependencies are allowed within dispatch groups. let LoadLatency = 3; + let MispredictPenalty = 14; // A branch direction mispredict. let Itineraries = SwiftItineraries; } -// TODO: Add Swift processor and scheduler resources. +// Swift predicates. +def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>; + +// Swift resource mapping. +let SchedModel = SwiftModel in { + // Processor resources. + def SwiftUnitP01 : ProcResource<2>; // ALU unit. + def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit. + def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit. + def SwiftUnitP2 : ProcResource<1>; // LS unit. + def SwiftUnitDiv : ProcResource<1>; + + // Generic resource requirements. + def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; } + def SwiftWriteP01ThreeCycleTwoUops : + SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> { + let Latency = 3; + let NumMicroOps = 2; + } + def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; + } + + // 4.2.4 Arithmetic and Logical. + // ALU operation register shifted by immediate variant. + def SwiftWriteALUsi : SchedWriteVariant<[ + // lsl #2, lsl #1, or lsr #1. + SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>, + SchedVar<NoSchedPred, [WriteALU]> + ]>; + def SwiftWriteALUsr : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>, + SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]> + ]>; + def SwiftWriteALUSsr : SchedWriteVariant<[ + SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>, + SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]> + ]>; + def SwiftReadAdvanceALUsr : SchedReadVariant<[ + SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>, + SchedVar<NoSchedPred, [NoReadAdvance]> + ]>; + // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR + // AND,BIC,EOR,ORN,ORR + // CLZ,RBIT,REV,REV16,REVSH,PKH + def : WriteRes<WriteALU, [SwiftUnitP01]>; + def : SchedAlias<WriteALUsi, SwiftWriteALUsi>; + def : SchedAlias<WriteALUsr, SwiftWriteALUsr>; + def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>; + def : ReadAdvance<ReadALU, 0>; + def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>; + + // 4.2.5 Integer comparison + def : WriteRes<WriteCMP, [SwiftUnitP01]>; + def : WriteRes<WriteCMPsi, [SwiftUnitP01]>; + def : WriteRes<WriteCMPsr, [SwiftUnitP01]>; +} diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index b33b3c9..41a7e0c 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -13,8 +13,8 @@ #define DEBUG_TYPE "arm-selectiondag-info" #include "ARMTargetMachine.h" -#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DerivedTypes.h" using namespace llvm; ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index bcc9db4..739300e 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -12,11 +12,14 @@ //===----------------------------------------------------------------------===// #include "ARMSubtarget.h" -#include "ARMBaseRegisterInfo.h" #include "ARMBaseInstrInfo.h" -#include "llvm/GlobalValue.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -40,60 +43,88 @@ StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) + const std::string &FS, const TargetOptions &Options) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) - , HasV4TOps(false) - , HasV5TOps(false) - , HasV5TEOps(false) - , HasV6Ops(false) - , HasV6T2Ops(false) - , HasV7Ops(false) - , HasVFPv2(false) - , HasVFPv3(false) - , HasVFPv4(false) - , HasNEON(false) - , UseNEONForSinglePrecisionFP(false) - , UseMulOps(UseFusedMulOps) - , SlowFPVMLx(false) - , HasVMLxForwarding(false) - , SlowFPBrcc(false) - , InThumbMode(false) - , HasThumb2(false) - , IsMClass(false) - , NoARM(false) - , PostRAScheduler(false) - , IsR9Reserved(ReserveR9) - , UseMovt(false) - , SupportsTailCall(false) - , HasFP16(false) - , HasD16(false) - , HasHardwareDivide(false) - , HasHardwareDivideInARM(false) - , HasT2ExtractPack(false) - , HasDataBarrier(false) - , Pref32BitThumb(false) - , AvoidCPSRPartialUpdate(false) - , HasRAS(false) - , HasMPExtension(false) - , FPOnlySP(false) - , AllowsUnalignedMem(false) - , Thumb2DSP(false) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) + , Options(Options) , TargetABI(ARM_ABI_APCS) { - // Determine default and user specified characteristics + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); +} + +void ARMSubtarget::initializeEnvironment() { + HasV4TOps = false; + HasV5TOps = false; + HasV5TEOps = false; + HasV6Ops = false; + HasV6T2Ops = false; + HasV7Ops = false; + HasVFPv2 = false; + HasVFPv3 = false; + HasVFPv4 = false; + HasNEON = false; + UseNEONForSinglePrecisionFP = false; + UseMulOps = UseFusedMulOps; + SlowFPVMLx = false; + HasVMLxForwarding = false; + SlowFPBrcc = false; + InThumbMode = false; + HasThumb2 = false; + IsMClass = false; + NoARM = false; + PostRAScheduler = false; + IsR9Reserved = ReserveR9; + UseMovt = false; + SupportsTailCall = false; + HasFP16 = false; + HasD16 = false; + HasHardwareDivide = false; + HasHardwareDivideInARM = false; + HasT2ExtractPack = false; + HasDataBarrier = false; + Pref32BitThumb = false; + AvoidCPSRPartialUpdate = false; + AvoidMOVsShifterOperand = false; + HasRAS = false; + HasMPExtension = false; + FPOnlySP = false; + AllowsUnalignedMem = false; + Thumb2DSP = false; + UseNaClTrap = false; + UnsafeFPMath = false; +} + +void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { + AttributeSet FnAttrs = MF->getFunction()->getAttributes(); + Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-cpu"); + Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, + "target-features"); + std::string CPU = + !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; + std::string FS = + !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; + if (!FS.empty()) { + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); + } +} + +void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) CPUString = "generic"; // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied // based on the architecture version. - std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPUString); + std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(), + CPUString); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS; + ArchFS = ArchFS + "," + FS.str(); else ArchFS = FS; } @@ -110,7 +141,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if ((TT.find("eabi") != std::string::npos) || (isTargetIOS() && isMClass())) + if ((TargetTriple.getTriple().find("eabi") != std::string::npos) || + (isTargetIOS() && isMClass())) // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. // Darwin-EABI conforms to AACPS but not the rest of EABI. TargetABI = ARM_ABI_AAPCS; @@ -133,6 +165,12 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, // configuration. if (!StrictAlign && hasV6Ops() && isTargetDarwin()) AllowsUnalignedMem = true; + + // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. + uint64_t Bits = getFeatureBits(); + if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters + (Options.UnsafeFPMath || isTargetDarwin())) + UseNEONForSinglePrecisionFP = true; } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8e6b650..5b5ee6a 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -15,9 +15,9 @@ #define ARMSUBTARGET_H #include "MCTargetDesc/ARMMCTargetDesc.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/MC/MCInstrItineraries.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> #define GET_SUBTARGETINFO_HEADER @@ -26,11 +26,12 @@ namespace llvm { class GlobalValue; class StringRef; +class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA8, CortexA9, CortexA15, Swift + Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift }; /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. @@ -131,6 +132,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate; + /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting + /// movs with shifter operand (i.e. asr, lsl, lsr). + bool AvoidMOVsShifterOperand; + /// HasRAS - Some processors perform return stack prediction. CodeGen should /// avoid issue "normal" call instructions to callees which do not return. bool HasRAS; @@ -152,6 +157,12 @@ protected: /// and such) instructions in Thumb2 code. bool Thumb2DSP; + /// NaCl TRAP instruction is generated instead of the regular TRAP. + bool UseNaClTrap; + + /// Target machine allowed unsafe FP math (such as use of NEON fp) + bool UnsafeFPMath; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -168,6 +179,9 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; + /// Options passed via command line that could influence the target + const TargetOptions &Options; + public: enum { isELF, isDarwin @@ -182,7 +196,7 @@ protected: /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); + const std::string &FS, const TargetOptions &Options); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -195,6 +209,12 @@ protected: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + /// \brief Reset the features for the ARM target. + virtual void resetSubtargetFeatures(const MachineFunction *MF); +private: + void initializeEnvironment(); + void resetSubtargetFeatures(StringRef CPU, StringRef FS); +public: void computeIssueWidth(); bool hasV4TOps() const { return HasV4TOps; } @@ -204,12 +224,14 @@ protected: bool hasV6T2Ops() const { return HasV6T2Ops; } bool hasV7Ops() const { return HasV7Ops; } + bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } bool isCortexA15() const { return ARMProcFamily == CortexA15; } bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return CPUString == "cortex-m3"; } bool isLikeA9() const { return isCortexA9() || isCortexA15(); } + bool isCortexR5() const { return ARMProcFamily == CortexR5; } bool hasARMOps() const { return !NoARM; } @@ -231,9 +253,11 @@ protected: bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } + bool useNaClTrap() const { return UseNaClTrap; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } @@ -243,7 +267,7 @@ protected: bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetNaCl() const { - return TargetTriple.getOS() == Triple::NativeClient; + return TargetTriple.getOS() == Triple::NaCl; } bool isTargetELF() const { return !isTargetDarwin(); } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b486d4f..42c7d2c 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,11 +11,11 @@ //===----------------------------------------------------------------------===// #include "ARMTargetMachine.h" -#include "ARMFrameLowering.h" #include "ARM.h" -#include "llvm/PassManager.h" +#include "ARMFrameLowering.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" @@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), cl::init(true)); +static cl::opt<bool> +DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, + cl::desc("Inhibit optimization of S->D register accesses on A15"), + cl::init(false)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), + Subtarget(TT, CPU, FS, Options), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI @@ -51,6 +56,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, this->Options.FloatABIType = FloatABI::Soft; } +void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our ARM pass. This + // allows the ARM pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createARMTargetTransformInfoPass(this)); +} + + void ARMTargetMachine::anchor() { } ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, @@ -70,8 +84,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "v128:64:128-v64:64:64-n32-S32")), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget), - STTI(&TLInfo), VTTI(&TLInfo) { + FrameLowering(Subtarget) { if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " "support ARM mode execution!"); @@ -103,8 +116,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, TSInfo(*this), FrameLowering(Subtarget.hasThumb2() ? new ARMFrameLowering(Subtarget) - : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)), - STTI(&TLInfo), VTTI(&TLInfo) { + : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } namespace { @@ -157,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() { addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); + // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be + // enabled when NEON is available. + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() && + getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { + addPass(createA15SDOptimizerPass()); + } return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index ebdd5b4..d4caf5c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -14,20 +14,19 @@ #ifndef ARMTARGETMACHINE_H #define ARMTARGETMACHINE_H -#include "ARMInstrInfo.h" #include "ARMFrameLowering.h" -#include "ARMJITInfo.h" -#include "ARMSubtarget.h" #include "ARMISelLowering.h" +#include "ARMInstrInfo.h" +#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" -#include "Thumb1InstrInfo.h" +#include "ARMSubtarget.h" #include "Thumb1FrameLowering.h" +#include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetTransformImpl.h" -#include "llvm/DataLayout.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { @@ -47,10 +46,17 @@ public: virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } + virtual const ARMTargetLowering *getTargetLowering() const { + // Implemented by derived classes + llvm_unreachable("getTargetLowering not implemented"); + } virtual const InstrItineraryData *getInstrItineraryData() const { return &InstrItins; } + /// \brief Register ARM analysis passes with a pass manager. + virtual void addAnalysisPasses(PassManagerBase &PM); + // Pass Pipeline Configuration virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); @@ -66,8 +72,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; ARMFrameLowering FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -89,12 +93,6 @@ class ARMTargetMachine : public ARMBaseTargetMachine { virtual const ARMFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const DataLayout *getDataLayout() const { return &DL; } }; @@ -112,8 +110,6 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. OwningPtr<ARMFrameLowering> FrameLowering; - ScalarTargetTransformImpl STTI; - VectorTargetTransformImpl VTTI; public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -142,12 +138,6 @@ public: virtual const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } - virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { - return &STTI; - } - virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { - return &VTTI; - } virtual const DataLayout *getDataLayout() const { return &DL; } }; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 3d85ca7..dfdf6ab 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -9,12 +9,14 @@ #include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" +#include "llvm/Target/Mangler.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace dwarf; @@ -38,3 +40,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, 0, SectionKind::getMetadata()); } + +const MCExpr *ARMElfTargetObjectFile:: +getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const { + assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); + + return MCSymbolRefExpr::Create(Mang->getSymbol(GV), + MCSymbolRefExpr::VK_ARM_TARGET2, + getContext()); +} diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index c6a7261..7f60727 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -28,6 +28,11 @@ public: virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + const MCExpr * + getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const; + virtual const MCSection *getAttributesSection() const { return AttributesSection; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp new file mode 100644 index 0000000..1019b97 --- /dev/null +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -0,0 +1,458 @@ +//===-- ARMTargetTransformInfo.cpp - ARM specific TTI pass ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// ARM target machine. It uses the target's detailed information to provide +/// more precise answers to certain TTI queries, while letting the target +/// independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "armtti" +#include "ARM.h" +#include "ARMTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/CostTable.h" +using namespace llvm; + +// Declare the pass initialization routine locally as target-specific passes +// don't havve a target-wide initialization entry point, and so we rely on the +// pass constructor initialization. +namespace llvm { +void initializeARMTTIPass(PassRegistry &); +} + +namespace { + +class ARMTTI : public ImmutablePass, public TargetTransformInfo { + const ARMBaseTargetMachine *TM; + const ARMSubtarget *ST; + const ARMTargetLowering *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + ARMTTI(const ARMBaseTargetMachine *TM) + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + TLI(TM->getTargetLowering()) { + initializeARMTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; + + /// @} + + + /// \name Vector TTI Implementations + /// @{ + + unsigned getNumberOfRegisters(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 16; + return 0; + } + + if (ST->isThumb1Only()) + return 8; + return 16; + } + + unsigned getRegisterBitWidth(bool Vector) const { + if (Vector) { + if (ST->hasNEON()) + return 128; + return 0; + } + + return 32; + } + + unsigned getMaximumUnrollFactor() const { + // These are out of order CPUs: + if (ST->isCortexA15() || ST->isSwift()) + return 2; + return 1; + } + + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const; + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const; + + unsigned getAddressComputationCost(Type *Val) const; + /// @} +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(ARMTTI, TargetTransformInfo, "armtti", + "ARM Target Transform Info", true, true, false) +char ARMTTI::ID = 0; + +ImmutablePass * +llvm::createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM) { + return new ARMTTI(TM); +} + + +unsigned ARMTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned Bits = Ty->getPrimitiveSizeInBits(); + if (Bits == 0 || Bits > 32) + return 4; + + int32_t SImmVal = Imm.getSExtValue(); + uint32_t ZImmVal = Imm.getZExtValue(); + if (!ST->isThumb()) { + if ((SImmVal >= 0 && SImmVal < 65536) || + (ARM_AM::getSOImmVal(ZImmVal) != -1) || + (ARM_AM::getSOImmVal(~ZImmVal) != -1)) + return 1; + return ST->hasV6T2Ops() ? 2 : 3; + } else if (ST->isThumb2()) { + if ((SImmVal >= 0 && SImmVal < 65536) || + (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || + (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) + return 1; + return ST->hasV6T2Ops() ? 2 : 3; + } else /*Thumb1*/ { + if (SImmVal >= 0 && SImmVal < 256) + return 1; + if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) + return 2; + // Load from constantpool. + return 3; + } + return 2; +} + +unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Single to/from double precision conversions. + static const CostTblEntry<MVT> NEONFltDblTbl[] = { + // Vector fptrunc/fpext conversions. + { ISD::FP_ROUND, MVT::v2f64, 2 }, + { ISD::FP_EXTEND, MVT::v2f32, 2 }, + { ISD::FP_EXTEND, MVT::v4f32, 4 } + }; + + if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || + ISD == ISD::FP_EXTEND)) { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl), + ISD, LT.second); + if (Idx != -1) + return LT.first * NEONFltDblTbl[Idx].Cost; + } + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + + // Some arithmetic, load and store operations have specific instructions + // to cast up/down their types automatically at no extra cost. + // TODO: Get these tables to know at least what the related operations are. + static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = { + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 }, + { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, + { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, + { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + + // The number of vmovl instructions for the extension. + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + + // Operations that we legalize using load/stores to the stack. + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 }, + + // Vector float <-> i32 conversions. + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, + + // Vector double <-> i32 conversions. + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, + { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } + }; + + if (SrcTy.isVector() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl, + array_lengthof(NEONVectorConversionTbl), + ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorConversionTbl[Idx].Cost; + } + + // Scalar float to integer conversions. + static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = { + { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, + { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, + { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, + { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, + { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, + { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, + { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } + }; + if (SrcTy.isFloatingPoint() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl, + array_lengthof(NEONFloatConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONFloatConversionTbl[Idx].Cost; + } + + // Scalar integer to float conversions. + static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = { + { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, + { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, + { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, + { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, + { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } + }; + + if (SrcTy.isInteger() && ST->hasNEON()) { + int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl, + array_lengthof(NEONIntegerConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return NEONIntegerConversionTbl[Idx].Cost; + } + + // Scalar integer conversion costs. + static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = { + // i16 -> i64 requires two dependent operations. + { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, + + // Truncates on i64 are assumed to be free. + { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, + { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } + }; + + if (SrcTy.isInteger()) { + int Idx = + ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl, + array_lengthof(ARMIntegerConversionTbl), + ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return ARMIntegerConversionTbl[Idx].Cost; + } + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index) const { + // Penalize inserting into an D-subregister. We end up with a three times + // lower estimated throughput on swift. + if (ST->isSwift() && + Opcode == Instruction::InsertElement && + ValTy->isVectorTy() && + ValTy->getScalarSizeInBits() <= 32) + return 3; + + return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); +} + +unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + // On NEON a a vector select gets lowered to vbsl. + if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + // Lowering of some vector selects is currently far from perfect. + static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = { + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, + { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } + }; + + EVT SelCondTy = TLI->getValueType(CondTy); + EVT SelValTy = TLI->getValueType(ValTy); + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); + return LT.first; + } + + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned ARMTTI::getAddressComputationCost(Type *Ty) const { + // In many cases the address computation is not merged into the instruction + // addressing mode. + return 1; +} + +unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + // We only handle costs of reverse shuffles for now. + if (Kind != SK_Reverse) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + static const CostTblEntry<MVT> NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a double + // word (vrev) or two if we shuffle a quad word (vrev, vext). + { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, + { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, + + { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 }, + { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 } + }; + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + + int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl), + ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + + return LT.first * NEONShuffleTbl[Idx].Cost; +} diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp deleted file mode 100644 index fda8536..0000000 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ /dev/null @@ -1,138 +0,0 @@ -//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARMBaseInfo.h" - -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCTargetAsmLexer.h" - -#include "llvm/Support/TargetRegistry.h" - -#include "llvm/ADT/StringSwitch.h" - -#include <string> -#include <map> - -using namespace llvm; - -namespace { - -class ARMBaseAsmLexer : public MCTargetAsmLexer { - const MCAsmInfo &AsmInfo; - - const AsmToken &lexDefinite() { - return getLexer()->Lex(); - } - - AsmToken LexTokenUAL(); -protected: - typedef std::map <std::string, unsigned> rmap_ty; - - rmap_ty RegisterMap; - - void InitRegisterMap(const MCRegisterInfo *info) { - unsigned numRegs = info->getNumRegs(); - - for (unsigned i = 0; i < numRegs; ++i) { - const char *regName = info->getName(i); - if (regName) - RegisterMap[regName] = i; - } - } - - unsigned MatchRegisterName(StringRef Name) { - rmap_ty::iterator iter = RegisterMap.find(Name.str()); - if (iter != RegisterMap.end()) - return iter->second; - else - return 0; - } - - AsmToken LexToken() { - if (!Lexer) { - SetError(SMLoc(), "No MCAsmLexer installed"); - return AsmToken(AsmToken::Error, "", 0); - } - - switch (AsmInfo.getAssemblerDialect()) { - default: - SetError(SMLoc(), "Unhandled dialect"); - return AsmToken(AsmToken::Error, "", 0); - case 0: - return LexTokenUAL(); - } - } -public: - ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) - : MCTargetAsmLexer(T), AsmInfo(MAI) { - } -}; - -class ARMAsmLexer : public ARMBaseAsmLexer { -public: - ARMAsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI) - : ARMBaseAsmLexer(T, MAI) { - InitRegisterMap(&MRI); - } -}; - -class ThumbAsmLexer : public ARMBaseAsmLexer { -public: - ThumbAsmLexer(const Target &T, const MCRegisterInfo &MRI,const MCAsmInfo &MAI) - : ARMBaseAsmLexer(T, MAI) { - InitRegisterMap(&MRI); - } -}; - -} // end anonymous namespace - -AsmToken ARMBaseAsmLexer::LexTokenUAL() { - const AsmToken &lexedToken = lexDefinite(); - - switch (lexedToken.getKind()) { - default: break; - case AsmToken::Error: - SetError(Lexer->getErrLoc(), Lexer->getErr()); - break; - case AsmToken::Identifier: { - std::string lowerCase = lexedToken.getString().lower(); - - unsigned regID = MatchRegisterName(lowerCase); - // Check for register aliases. - // r13 -> sp - // r14 -> lr - // r15 -> pc - // ip -> r12 - // FIXME: Some assemblers support lots of others. Do we want them all? - if (!regID) { - regID = StringSwitch<unsigned>(lowerCase) - .Case("r13", ARM::SP) - .Case("r14", ARM::LR) - .Case("r15", ARM::PC) - .Case("ip", ARM::R12) - .Default(0); - } - - if (regID) - return AsmToken(AsmToken::Register, - lexedToken.getString(), - static_cast<int64_t>(regID)); - } - } - - return AsmToken(lexedToken); -} - -extern "C" void LLVMInitializeARMAsmLexer() { - RegisterMCAsmLexer<ARMAsmLexer> X(TheARMTarget); - RegisterMCAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); -} diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index c61e3bd..ed7b7ec 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -7,31 +7,34 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" using namespace llvm; @@ -178,7 +181,8 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); + OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, + SMLoc &EndLoc); // Asm Match Converter Methods void cvtT2LdrdPre(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &); @@ -249,6 +253,13 @@ public: // Not in an ITBlock to start with. ITState.CurPosition = ~0U; + + // Set ELF header flags. + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default. + if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer())) + MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } // Implementation of the MCTargetAsmParser interface: @@ -258,6 +269,7 @@ public: SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); + unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); unsigned checkTargetMatchPredicate(MCInst &Inst); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -270,7 +282,7 @@ public: namespace { /// ARMOperand - Instances of this class represent a parsed ARM machine -/// instruction. +/// operand. class ARMOperand : public MCParsedAsmOperand { enum KindTy { k_CondCode, @@ -304,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SmallVector<unsigned, 8> Registers; + struct CCOp { + ARMCC::CondCodes Val; + }; + + struct CopOp { + unsigned Val; + }; + + struct CoprocOptionOp { + unsigned Val; + }; + + struct ITMaskOp { + unsigned Mask:4; + }; + + struct MBOptOp { + ARM_MB::MemBOpt Val; + }; + + struct IFlagsOp { + ARM_PROC::IFlags Val; + }; + + struct MMaskOp { + unsigned Val; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + // A vector register list is a sequential list of 1 to 4 registers. + struct VectorListOp { + unsigned RegNum; + unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; + }; + + struct VectorIndexOp { + unsigned Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + /// Combined record for all forms of ARM address expressions. + struct MemoryOp { + unsigned BaseRegNum; + // Offset is in OffsetReg or OffsetImm. If both are zero, no offset + // was specified. + const MCConstantExpr *OffsetImm; // Offset immediate value + unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL + ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg + unsigned ShiftImm; // shift for OffsetReg. + unsigned Alignment; // 0 = no alignment specified + // n = alignment in bytes (2, 4, 8, 16, or 32) + unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) + }; + + struct PostIdxRegOp { + unsigned RegNum; + bool isAdd; + ARM_AM::ShiftOpc ShiftTy; + unsigned ShiftImm; + }; + + struct ShifterImmOp { + bool isASR; + unsigned Imm; + }; + + struct RegShiftedRegOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftReg; + unsigned ShiftImm; + }; + + struct RegShiftedImmOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftImm; + }; + + struct RotImmOp { + unsigned Imm; + }; + + struct BitfieldOp { + unsigned LSB; + unsigned Width; + }; + union { - struct { - ARMCC::CondCodes Val; - } CC; - - struct { - unsigned Val; - } Cop; - - struct { - unsigned Val; - } CoprocOption; - - struct { - unsigned Mask:4; - } ITMask; - - struct { - ARM_MB::MemBOpt Val; - } MBOpt; - - struct { - ARM_PROC::IFlags Val; - } IFlags; - - struct { - unsigned Val; - } MMask; - - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - // A vector register list is a sequential list of 1 to 4 registers. - struct { - unsigned RegNum; - unsigned Count; - unsigned LaneIndex; - bool isDoubleSpaced; - } VectorList; - - struct { - unsigned Val; - } VectorIndex; - - struct { - const MCExpr *Val; - } Imm; - - /// Combined record for all forms of ARM address expressions. - struct { - unsigned BaseRegNum; - // Offset is in OffsetReg or OffsetImm. If both are zero, no offset - // was specified. - const MCConstantExpr *OffsetImm; // Offset immediate value - unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL - ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg - unsigned ShiftImm; // shift for OffsetReg. - unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (2, 4, 8, 16, or 32) - unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) - } Memory; - - struct { - unsigned RegNum; - bool isAdd; - ARM_AM::ShiftOpc ShiftTy; - unsigned ShiftImm; - } PostIdxReg; - - struct { - bool isASR; - unsigned Imm; - } ShifterImm; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftReg; - unsigned ShiftImm; - } RegShiftedReg; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftImm; - } RegShiftedImm; - struct { - unsigned Imm; - } RotImm; - struct { - unsigned LSB; - unsigned Width; - } Bitfield; + struct CCOp CC; + struct CopOp Cop; + struct CoprocOptionOp CoprocOption; + struct MBOptOp MBOpt; + struct ITMaskOp ITMask; + struct IFlagsOp IFlags; + struct MMaskOp MMask; + struct TokOp Tok; + struct RegOp Reg; + struct VectorListOp VectorList; + struct VectorIndexOp VectorIndex; + struct ImmOp Imm; + struct MemoryOp Memory; + struct PostIdxRegOp PostIdxReg; + struct ShifterImmOp ShifterImm; + struct RegShiftedRegOp RegShiftedReg; + struct RegShiftedImmOp RegShiftedImm; + struct RotImmOp RotImm; + struct BitfieldOp Bitfield; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -2450,8 +2486,8 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { StartLoc = Parser.getTok().getLoc(); + EndLoc = Parser.getTok().getEndLoc(); RegNo = tryParseRegister(); - EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } @@ -2540,6 +2576,8 @@ int ARMAsmParser::tryParseShiftRegister( if (!PrevOp->isReg()) return Error(PrevOp->getStartLoc(), "shift must be of a register"); int SrcReg = PrevOp->getReg(); + + SMLoc EndLoc; int64_t Imm = 0; int ShiftReg = 0; if (ShiftTy == ARM_AM::rrx) { @@ -2554,7 +2592,7 @@ int ARMAsmParser::tryParseShiftRegister( Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; - if (getParser().ParseExpression(ShiftExpr)) { + if (getParser().parseExpression(ShiftExpr, EndLoc)) { Error(ImmLoc, "invalid immediate shift value"); return -1; } @@ -2579,8 +2617,9 @@ int ARMAsmParser::tryParseShiftRegister( if (Imm == 0) ShiftTy = ARM_AM::lsl; } else if (Parser.getTok().is(AsmToken::Identifier)) { - ShiftReg = tryParseRegister(); SMLoc L = Parser.getTok().getLoc(); + EndLoc = Parser.getTok().getEndLoc(); + ShiftReg = tryParseRegister(); if (ShiftReg == -1) { Error (L, "expected immediate or register in shift operand"); return -1; @@ -2595,10 +2634,10 @@ int ARMAsmParser::tryParseShiftRegister( if (ShiftReg && ShiftTy != ARM_AM::rrx) Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg, ShiftReg, Imm, - S, Parser.getTok().getLoc())); + S, EndLoc)); else Operands.push_back(ARMOperand::CreateShiftedImmediate(ShiftTy, SrcReg, Imm, - S, Parser.getTok().getLoc())); + S, EndLoc)); return 0; } @@ -2612,12 +2651,13 @@ int ARMAsmParser::tryParseShiftRegister( /// parse for a specific register type. bool ARMAsmParser:: tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - SMLoc S = Parser.getTok().getLoc(); + const AsmToken &RegTok = Parser.getTok(); int RegNo = tryParseRegister(); if (RegNo == -1) return true; - Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(), + RegTok.getEndLoc())); const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { @@ -2635,16 +2675,16 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat left bracket token. const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) + if (getParser().parseExpression(ImmVal)) return true; const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); if (!MCE) return TokError("immediate value expected for vector index"); - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), @@ -2780,7 +2820,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *Expr; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(Expr)) { + if (getParser().parseExpression(Expr)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -2794,7 +2834,7 @@ parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Check for and consume the closing '}' if (Parser.getTok().isNot(AsmToken::RCurly)) return MatchOperand_ParseFail; - SMLoc E = Parser.getTok().getLoc(); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the '}' Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E)); @@ -2891,10 +2931,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { Parser.Lex(); // Eat the minus. - SMLoc EndLoc = Parser.getTok().getLoc(); + SMLoc AfterMinusLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) - return Error(EndLoc, "register expected"); + return Error(AfterMinusLoc, "register expected"); // Allow Q regs and just interpret them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) EndReg = getDRegFromQReg(EndReg) + 1; @@ -2904,10 +2944,10 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { continue; // The register must be in the same register class as the first. if (!RC->contains(EndReg)) - return Error(EndLoc, "invalid register in register list"); + return Error(AfterMinusLoc, "invalid register in register list"); // Ranges must go from low to high. if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) - return Error(EndLoc, "bad range in register list"); + return Error(AfterMinusLoc, "bad range in register list"); // Add all the registers in the range to the register list. while (Reg != EndReg) { @@ -2955,9 +2995,9 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc)); } - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RCurly)) - return Error(E, "'}' expected"); + return Error(Parser.getTok().getLoc(), "'}' expected"); + SMLoc E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat '}' token. // Push the register list operand. @@ -2974,13 +3014,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Helper function to parse the lane index for vector lists. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { +parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { Index = 0; // Always return a defined index value. if (Parser.getTok().is(AsmToken::LBrac)) { Parser.Lex(); // Eat the '['. if (Parser.getTok().is(AsmToken::RBrac)) { // "Dn[]" is the 'all lanes' syntax. LaneKind = AllLanes; + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ']'. return MatchOperand_Success; } @@ -2992,7 +3033,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { const MCExpr *LaneIndex; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(LaneIndex)) { + if (getParser().parseExpression(LaneIndex)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3005,6 +3046,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { Error(Parser.getTok().getLoc(), "']' expected"); return MatchOperand_ParseFail; } + EndLoc = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ']'. int64_t Val = CE->getValue(); @@ -3031,21 +3073,19 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // (without encosing curly braces) as a single or double entry list, // respectively. if (Parser.getTok().is(AsmToken::Identifier)) { + SMLoc E = Parser.getTok().getEndLoc(); int Reg = tryParseRegister(); if (Reg == -1) return MatchOperand_NoMatch; - SMLoc E = Parser.getTok().getLoc(); if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { - OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E); if (Res != MatchOperand_Success) return Res; switch (LaneKind) { case NoLanes: - E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); break; case AllLanes: - E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false, S, E)); break; @@ -3059,18 +3099,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); - OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E); if (Res != MatchOperand_Success) return Res; switch (LaneKind) { case NoLanes: - E = Parser.getTok().getLoc(); Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); break; case AllLanes: - E = Parser.getTok().getLoc(); Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, @@ -3111,7 +3149,9 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ++Reg; ++Count; } - if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success) + + SMLoc E; + if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success) return MatchOperand_ParseFail; while (Parser.getTok().is(AsmToken::Comma) || @@ -3125,10 +3165,10 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat the minus. - SMLoc EndLoc = Parser.getTok().getLoc(); + SMLoc AfterMinusLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) { - Error(EndLoc, "register expected"); + Error(AfterMinusLoc, "register expected"); return MatchOperand_ParseFail; } // Allow Q regs and just interpret them as the two D sub-registers. @@ -3140,24 +3180,24 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { continue; // The register must be in the same register class as the first. if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) { - Error(EndLoc, "invalid register in register list"); + Error(AfterMinusLoc, "invalid register in register list"); return MatchOperand_ParseFail; } // Ranges must go from low to high. if (Reg > EndReg) { - Error(EndLoc, "bad range in register list"); + Error(AfterMinusLoc, "bad range in register list"); return MatchOperand_ParseFail; } // Parse the lane specifier if present. VectorLaneTy NextLaneKind; unsigned NextLaneIndex; - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != + MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { - Error(EndLoc, "mismatched lane index in register list"); + Error(AfterMinusLoc, "mismatched lane index in register list"); return MatchOperand_ParseFail; } - EndLoc = Parser.getTok().getLoc(); // Add all the registers in the range to the register list. Count += EndReg - Reg; @@ -3196,11 +3236,12 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Parse the lane specifier if present. VectorLaneTy NextLaneKind; unsigned NextLaneIndex; - SMLoc EndLoc = Parser.getTok().getLoc(); - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + SMLoc LaneLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != + MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { - Error(EndLoc, "mismatched lane index in register list"); + Error(LaneLoc, "mismatched lane index in register list"); return MatchOperand_ParseFail; } continue; @@ -3221,7 +3262,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { VectorLaneTy NextLaneKind; unsigned NextLaneIndex; SMLoc EndLoc = Parser.getTok().getLoc(); - if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success) return MatchOperand_ParseFail; if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { Error(EndLoc, "mismatched lane index in register list"); @@ -3229,11 +3270,11 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } } - SMLoc E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RCurly)) { - Error(E, "'}' expected"); + Error(Parser.getTok().getLoc(), "'}' expected"); return MatchOperand_ParseFail; } + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat '}' token. switch (LaneKind) { @@ -3310,7 +3351,7 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc Loc = Parser.getTok().getLoc(); const MCExpr *MemBarrierID; - if (getParser().ParseExpression(MemBarrierID)) { + if (getParser().parseExpression(MemBarrierID)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3525,7 +3566,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, const MCExpr *ShiftAmount; SMLoc Loc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { + SMLoc EndLoc; + if (getParser().parseExpression(ShiftAmount, EndLoc)) { Error(Loc, "illegal expression"); return MatchOperand_ParseFail; } @@ -3540,7 +3582,7 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, return MatchOperand_ParseFail; } - Operands.push_back(ARMOperand::CreateImm(CE, Loc, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc)); return MatchOperand_Success; } @@ -3550,7 +3592,7 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { - Error(Tok.getLoc(), "'be' or 'le' operand expected"); + Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } int Val = StringSwitch<int>(Tok.getString()) @@ -3560,12 +3602,12 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the token. if (Val == -1) { - Error(Tok.getLoc(), "'be' or 'le' operand expected"); + Error(S, "'be' or 'le' operand expected"); return MatchOperand_ParseFail; } Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), - S, Parser.getTok().getLoc())); + S, Tok.getEndLoc())); return MatchOperand_Success; } @@ -3601,16 +3643,17 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. + SMLoc ExLoc = Parser.getTok().getLoc(); const MCExpr *ShiftAmount; - SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { - Error(E, "malformed shift expression"); + SMLoc EndLoc; + if (getParser().parseExpression(ShiftAmount, EndLoc)) { + Error(ExLoc, "malformed shift expression"); return MatchOperand_ParseFail; } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); if (!CE) { - Error(E, "shift amount must be an immediate"); + Error(ExLoc, "shift amount must be an immediate"); return MatchOperand_ParseFail; } @@ -3618,25 +3661,24 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (isASR) { // Shift amount must be in [1,32] if (Val < 1 || Val > 32) { - Error(E, "'asr' shift amount must be in range [1,32]"); + Error(ExLoc, "'asr' shift amount must be in range [1,32]"); return MatchOperand_ParseFail; } // asr #32 encoded as asr #0, but is not allowed in Thumb2 mode. if (isThumb() && Val == 32) { - Error(E, "'asr #32' shift amount not allowed in Thumb mode"); + Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode"); return MatchOperand_ParseFail; } if (Val == 32) Val = 0; } else { // Shift amount must be in [1,32] if (Val < 0 || Val > 31) { - Error(E, "'lsr' shift amount must be in range [0,31]"); + Error(ExLoc, "'lsr' shift amount must be in range [0,31]"); return MatchOperand_ParseFail; } } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, E)); + Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc)); return MatchOperand_Success; } @@ -3662,16 +3704,17 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. + SMLoc ExLoc = Parser.getTok().getLoc(); const MCExpr *ShiftAmount; - SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ShiftAmount)) { - Error(E, "malformed rotate expression"); + SMLoc EndLoc; + if (getParser().parseExpression(ShiftAmount, EndLoc)) { + Error(ExLoc, "malformed rotate expression"); return MatchOperand_ParseFail; } const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount); if (!CE) { - Error(E, "rotate amount must be an immediate"); + Error(ExLoc, "rotate amount must be an immediate"); return MatchOperand_ParseFail; } @@ -3680,12 +3723,11 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // normally, zero is represented in asm by omitting the rotate operand // entirely. if (Val != 8 && Val != 16 && Val != 24 && Val != 0) { - Error(E, "'ror' rotate amount must be 8, 16, or 24"); + Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24"); return MatchOperand_ParseFail; } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateRotImm(Val, S, E)); + Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc)); return MatchOperand_Success; } @@ -3703,7 +3745,7 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const MCExpr *LSBExpr; SMLoc E = Parser.getTok().getLoc(); - if (getParser().ParseExpression(LSBExpr)) { + if (getParser().parseExpression(LSBExpr)) { Error(E, "malformed immediate expression"); return MatchOperand_ParseFail; } @@ -3735,7 +3777,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat hash token. const MCExpr *WidthExpr; - if (getParser().ParseExpression(WidthExpr)) { + SMLoc EndLoc; + if (getParser().parseExpression(WidthExpr, EndLoc)) { Error(E, "malformed immediate expression"); return MatchOperand_ParseFail; } @@ -3751,9 +3794,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Error(E, "'width' operand must be in the range [1,32-lsb]"); return MatchOperand_ParseFail; } - E = Parser.getTok().getLoc(); - Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, E)); + Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc)); return MatchOperand_Success; } @@ -3772,7 +3814,6 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Tok.getLoc(); bool haveEaten = false; bool isAdd = true; - int Reg = -1; if (Tok.is(AsmToken::Plus)) { Parser.Lex(); // Eat the '+' token. haveEaten = true; @@ -3781,15 +3822,15 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { isAdd = false; haveEaten = true; } - if (Parser.getTok().is(AsmToken::Identifier)) - Reg = tryParseRegister(); + + SMLoc E = Parser.getTok().getEndLoc(); + int Reg = tryParseRegister(); if (Reg == -1) { if (!haveEaten) return MatchOperand_NoMatch; Error(Parser.getTok().getLoc(), "register expected"); return MatchOperand_ParseFail; } - SMLoc E = Parser.getTok().getLoc(); ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift; unsigned ShiftImm = 0; @@ -3797,6 +3838,9 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the ','. if (parseMemRegOffsetShift(ShiftTy, ShiftImm)) return MatchOperand_ParseFail; + + // FIXME: Only approximates end...may include intervening whitespace. + E = Parser.getTok().getLoc(); } Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy, @@ -3829,14 +3873,14 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // differently. bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *Offset; - if (getParser().ParseExpression(Offset)) + SMLoc E; + if (getParser().parseExpression(Offset, E)) return MatchOperand_ParseFail; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset); if (!CE) { Error(S, "constant expression expected"); return MatchOperand_ParseFail; } - SMLoc E = Tok.getLoc(); // Negative zero is encoded as the flag value INT32_MIN. int32_t Val = CE->getValue(); if (isNegative && Val == 0) @@ -3851,7 +3895,6 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { bool haveEaten = false; bool isAdd = true; - int Reg = -1; if (Tok.is(AsmToken::Plus)) { Parser.Lex(); // Eat the '+' token. haveEaten = true; @@ -3860,18 +3903,18 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { isAdd = false; haveEaten = true; } - if (Parser.getTok().is(AsmToken::Identifier)) - Reg = tryParseRegister(); + + Tok = Parser.getTok(); + int Reg = tryParseRegister(); if (Reg == -1) { if (!haveEaten) return MatchOperand_NoMatch; - Error(Parser.getTok().getLoc(), "register expected"); + Error(Tok.getLoc(), "register expected"); return MatchOperand_ParseFail; } - SMLoc E = Parser.getTok().getLoc(); Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift, - 0, S, E)); + 0, S, Tok.getEndLoc())); return MatchOperand_Success; } @@ -4218,13 +4261,14 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (BaseRegNum == -1) return Error(BaseRegTok.getLoc(), "register expected"); - // The next token must either be a comma or a closing bracket. + // The next token must either be a comma, a colon or a closing bracket. const AsmToken &Tok = Parser.getTok(); - if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac)) + if (!Tok.is(AsmToken::Colon) && !Tok.is(AsmToken::Comma) && + !Tok.is(AsmToken::RBrac)) return Error(Tok.getLoc(), "malformed memory operand"); if (Tok.is(AsmToken::RBrac)) { - E = Tok.getLoc(); + E = Tok.getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift, @@ -4240,8 +4284,11 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return false; } - assert(Tok.is(AsmToken::Comma) && "Lost comma in memory operand?!"); - Parser.Lex(); // Eat the comma. + assert((Tok.is(AsmToken::Colon) || Tok.is(AsmToken::Comma)) && + "Lost colon or comma in memory operand?!"); + if (Tok.is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + } // If we have a ':', it's an alignment specifier. if (Parser.getTok().is(AsmToken::Colon)) { @@ -4249,7 +4296,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Parser.getTok().getLoc(); const MCExpr *Expr; - if (getParser().ParseExpression(Expr)) + if (getParser().parseExpression(Expr)) return true; // The expression has to be a constant. Memory references with relocations @@ -4272,9 +4319,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. // Don't worry about range checking the value here. That's handled by @@ -4305,7 +4352,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { bool isNegative = getParser().getTok().is(AsmToken::Minus); const MCExpr *Offset; - if (getParser().ParseExpression(Offset)) + if (getParser().parseExpression(Offset)) return true; // The expression has to be a constant. Memory references with relocations @@ -4321,9 +4368,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { CE = MCConstantExpr::Create(INT32_MIN, getContext()); // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. // Don't worry about range checking the value here. That's handled by @@ -4367,9 +4414,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Now we should have the closing ']' - E = Parser.getTok().getLoc(); if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); + return Error(Parser.getTok().getLoc(), "']' expected"); + E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum, @@ -4424,7 +4471,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, Parser.Lex(); // Eat hash token. const MCExpr *Expr; - if (getParser().ParseExpression(Expr)) + if (getParser().parseExpression(Expr)) return true; // Range check the immediate. // lsl, ror: 0 <= imm <= 31 @@ -4453,7 +4500,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Anything that can accept a floating point constant as an operand - // needs to go through here, as the regular ParseExpression is + // needs to go through here, as the regular parseExpression is // integer only. // // This routine still creates a generic Immediate operand, containing @@ -4546,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - if (!tryParseRegisterWithWriteBack(Operands)) - return false; - int Res = tryParseShiftRegister(Operands); - if (Res == 0) // success - return false; - else if (Res == -1) // irrecoverable error - return true; - // If this is VMRS, check for the apsr_nzcv operand. - if (Mnemonic == "vmrs" && - Parser.getTok().getString().equals_lower("apsr_nzcv")) { - S = Parser.getTok().getLoc(); - Parser.Lex(); - Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); - return false; + // If we've seen a branch mnemonic, the next operand must be a label. This + // is true even if the label is a register name. So "br r1" means branch to + // label "r1". + bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl"; + if (!ExpectLabel) { + if (!tryParseRegisterWithWriteBack(Operands)) + return false; + int Res = tryParseShiftRegister(Operands); + if (Res == 0) // success + return false; + else if (Res == -1) // irrecoverable error + return true; + // If this is VMRS, check for the apsr_nzcv operand. + if (Mnemonic == "vmrs" && + Parser.getTok().getString().equals_lower("apsr_nzcv")) { + S = Parser.getTok().getLoc(); + Parser.Lex(); + Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); + return false; + } } // Fall though for the Identifier case that is not a register or a @@ -4573,7 +4626,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, // identifier (like labels) as expressions and create them as immediates. const MCExpr *IdVal; S = Parser.getTok().getLoc(); - if (getParser().ParseExpression(IdVal)) + if (getParser().parseExpression(IdVal)) return true; E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(IdVal, S, E)); @@ -4592,7 +4645,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, if (Parser.getTok().isNot(AsmToken::Colon)) { bool isNegative = Parser.getTok().is(AsmToken::Minus); const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) + if (getParser().parseExpression(ImmVal)) return true; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); if (CE) { @@ -4602,6 +4655,15 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, } E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); + + // There can be a trailing '!' on operands that we want as a separate + // '!' Token operand. Handle that here. For example, the compatibilty + // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'. + if (Parser.getTok().is(AsmToken::Exclaim)) { + Operands.push_back(ARMOperand::CreateToken(Parser.getTok().getString(), + Parser.getTok().getLoc())); + Parser.Lex(); // Eat exclaim token + } return false; } // w/ a ':' after the '#', it's just like a plain ':'. @@ -4616,7 +4678,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return true; const MCExpr *SubExprVal; - if (getParser().ParseExpression(SubExprVal)) + if (getParser().parseExpression(SubExprVal)) return true; const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal, @@ -4989,7 +5051,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // In Thumb1, only the branch (B) instruction can be predicated. if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "conditional execution not supported in Thumb1"); } @@ -5003,14 +5065,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (Mnemonic == "it") { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2); if (ITMask.size() > 3) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "too many conditions on IT instruction"); } unsigned Mask = 8; for (unsigned i = ITMask.size(); i != 0; --i) { char pos = ITMask[i - 1]; if (pos != 't' && pos != 'e') { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "illegal IT block condition mask '" + ITMask + "'"); } Mask >>= 1; @@ -5036,14 +5098,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // If we had a carry-set on an instruction that can't do that, issue an // error. if (!CanAcceptCarrySet && CarrySetting) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "instruction '" + Mnemonic + "' can not set flags, but 's' suffix specified"); } // If we had a predication code on an instruction that can't do that, issue an // error. if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(NameLoc, "instruction '" + Mnemonic + "' is not predicable, but condition code specified"); } @@ -5092,7 +5154,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. if (parseOperand(Operands, Mnemonic)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } @@ -5101,7 +5163,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Parse and remember the operand. if (parseOperand(Operands, Mnemonic)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return true; } } @@ -5109,7 +5171,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (getLexer().isNot(AsmToken::EndOfStatement)) { SMLoc Loc = getLexer().getLoc(); - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Loc, "unexpected token in argument list"); } @@ -5140,50 +5202,42 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, delete Op; } - // The vector-compare-to-zero instructions have a literal token "#0" at - // the end that comes to here as an immediate operand. Convert it to a - // token to play nicely with the matcher. - if ((Mnemonic == "vceq" || Mnemonic == "vcge" || Mnemonic == "vcgt" || - Mnemonic == "vcle" || Mnemonic == "vclt") && Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[5])->isImm()) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0) { - Operands.erase(Operands.begin() + 5); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; - } - } - // VCMP{E} does the same thing, but with a different operand count. - if ((Mnemonic == "vcmp" || Mnemonic == "vcmpe") && Operands.size() == 5 && - static_cast<ARMOperand*>(Operands[4])->isImm()) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[4]); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0) { - Operands.erase(Operands.begin() + 4); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; - } - } - // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the - // end. Convert it to a token here. Take care not to convert those - // that should hit the Thumb2 encoding. - if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[5])->isImm()) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0 && - (isThumbOne() || - // The cc_out operand matches the IT block. - ((inITBlock() != CarrySetting) && - // Neither register operand is a high register. - (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && - isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){ - Operands.erase(Operands.begin() + 5); - Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); - delete Op; + // Adjust operands of ldrexd/strexd to MCK_GPRPair. + // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, + // a single GPRPair reg operand is used in the .td file to replace the two + // GPRs. However, when parsing from asm, the two GRPs cannot be automatically + // expressed as a GPRPair, so we have to manually merge them. + // FIXME: We would really like to be able to tablegen'erate this. + if (!isThumb() && Operands.size() > 4 && + (Mnemonic == "ldrexd" || Mnemonic == "strexd")) { + bool isLoad = (Mnemonic == "ldrexd"); + unsigned Idx = isLoad ? 2 : 3; + ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]); + ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]); + + const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID); + // Adjust only if Op1 and Op2 are GPRs. + if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) && + MRC.contains(Op2->getReg())) { + unsigned Reg1 = Op1->getReg(); + unsigned Reg2 = Op2->getReg(); + unsigned Rt = MRI->getEncodingValue(Reg1); + unsigned Rt2 = MRI->getEncodingValue(Reg2); + + // Rt2 must be Rt + 1 and Rt must be even. + if (Rt + 1 != Rt2 || (Rt & 1)) { + Error(Op2->getStartLoc(), isLoad ? + "destination operands must be sequential" : + "source operands must be sequential"); + return true; + } + unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0, + &(MRI->getRegClass(ARM::GPRPairRegClassID))); + Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2); + Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg( + NewReg, Op1->getStartLoc(), Op2->getEndLoc())); + delete Op1; + delete Op2; } } @@ -5274,8 +5328,7 @@ validateInstruction(MCInst &Inst, switch (Inst.getOpcode()) { case ARM::LDRD: case ARM::LDRD_PRE: - case ARM::LDRD_POST: - case ARM::LDREXD: { + case ARM::LDRD_POST: { // Rt2 must be Rt + 1. unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); @@ -5294,8 +5347,7 @@ validateInstruction(MCInst &Inst, return false; } case ARM::STRD_PRE: - case ARM::STRD_POST: - case ARM::STREXD: { + case ARM::STRD_POST: { // Rt2 must be Rt + 1. unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg()); unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg()); @@ -5686,7 +5738,12 @@ processInstruction(MCInst &Inst, } // Aliases for alternate PC+imm syntax of LDR instructions. case ARM::t2LDRpcrel: - Inst.setOpcode(ARM::t2LDRpci); + // Select the narrow version if the immediate will fit. + if (Inst.getOperand(1).getImm() > 0 && + Inst.getOperand(1).getImm() <= 0xff) + Inst.setOpcode(ARM::tLDRpci); + else + Inst.setOpcode(ARM::t2LDRpci); return true; case ARM::t2LDRBpcrel: Inst.setOpcode(ARM::t2LDRBpci); @@ -7483,6 +7540,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult; + MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { @@ -7595,10 +7653,10 @@ bool ARMAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - if (getParser().ParseExpression(Value)) + if (getParser().parseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -7742,13 +7800,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { unsigned Reg; SMLoc SRegLoc, ERegLoc; if (ParseRegister(Reg, SRegLoc, ERegLoc)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(SRegLoc, "register name expected"); } // Shouldn't be anything else. if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(Parser.getTok().getLoc(), "unexpected input in .req directive."); } @@ -7766,7 +7824,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { /// ::= .unreq registername bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { if (Parser.getTok().isNot(AsmToken::Identifier)) { - Parser.EatToEndOfStatement(); + Parser.eatToEndOfStatement(); return Error(L, "unexpected input in .unreq directive."); } RegisterReqs.erase(Parser.getTok().getIdentifier()); @@ -7786,16 +7844,31 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { return true; } -extern "C" void LLVMInitializeARMAsmLexer(); - /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterMCAsmParser<ARMAsmParser> X(TheARMTarget); RegisterMCAsmParser<ARMAsmParser> Y(TheThumbTarget); - LLVMInitializeARMAsmLexer(); } #define GET_REGISTER_MATCHER #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" + +// Define this matcher function after the auto-generated include so we +// have the match class enum definitions. +unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, + unsigned Kind) { + ARMOperand *Op = static_cast<ARMOperand*>(AsmOp); + // If the kind is a token for a literal immediate, check if our asm + // operand matches. This is for InstAliases which have a fixed-value + // immediate in the syntax. + if (Kind == MCK__35_0 && Op->isImm()) { + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + if (!CE) + return Match_InvalidOperand; + if (CE->getValue() == 0) + return Match_Success; + } + return Match_InvalidOperand; +} diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index e24a1b1..d2012c3 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -1,7 +1,6 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmParser - ARMAsmLexer.cpp ARMAsmParser.cpp ) diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 377bd92..b832508 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -11,11 +11,11 @@ tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel) tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel) tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv) tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info) tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen + A15SDOptimizer.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp @@ -38,6 +38,7 @@ add_llvm_target(ARMCodeGen ARMSubtarget.cpp ARMTargetMachine.cpp ARMTargetObjectFile.cpp + ARMTargetTransformInfo.cpp MLxExpansionPass.cpp Thumb1FrameLowering.cpp Thumb1InstrInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index f00142d..2e009e5 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -9,21 +9,20 @@ #define DEBUG_TYPE "arm-disassembler" +#include "llvm/MC/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMBaseInfo.h" -#include "llvm/MC/EDInstInfo.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrDesc.h" -#include "llvm/MC/MCExpr.h" +#include "MCTargetDesc/ARMMCExpr.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <vector> @@ -105,10 +104,6 @@ public: uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const; - - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; -private: }; /// ThumbDisassembler - Thumb disassembler for all Thumb platforms. @@ -131,8 +126,6 @@ public: raw_ostream &vStream, raw_ostream &cStream) const; - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; private: mutable ITStatus ITBlock; DecodeStatus AddThumbPredicate(MCInst&) const; @@ -385,7 +378,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" -#include "ARMGenEDInfo.inc" static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { return new ARMDisassembler(STI); @@ -395,14 +387,6 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge return new ThumbDisassembler(STI); } -const EDInstInfo *ARMDisassembler::getEDInfo() const { - return instInfoARM; -} - -const EDInstInfo *ThumbDisassembler::getEDInfo() const { - return instInfoARM; -} - DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, const MemoryObject &Region, uint64_t Address, @@ -1281,7 +1265,13 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, unsigned lsb = fieldFromInstruction(Val, 0, 5); DecodeStatus S = MCDisassembler::Success; - if (lsb > msb) Check(S, MCDisassembler::SoftFail); + if (lsb > msb) { + Check(S, MCDisassembler::SoftFail); + // The check above will cause the warning for the "potentially undefined + // instruction encoding" but we can't build a bad MCOperand value here + // with a lsb > msb or else printing the MCInst will cause a crash. + lsb = msb; + } uint32_t msb_mask = 0xFFFFFFFF; if (msb != 31) msb_mask = (1U << (msb+1)) - 1; @@ -3059,9 +3049,9 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4, + if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4, true, 2, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + Inst.addOperand(MCOperand::CreateImm(Val << 1)); return MCDisassembler::Success; } @@ -3288,7 +3278,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } - if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (load) { diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index dcc41d9..2afb20d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -13,11 +13,11 @@ #define DEBUG_TYPE "asm-printer" #include "ARMInstPrinter.h" -#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/MC/MCInst.h" +#include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" @@ -252,6 +252,35 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } + // Combine 2 GPRs from disassember into a GPRPair to match with instr def. + // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, + // a single GPRPair reg operand is used in the .td file to replace the two + // GPRs. However, when decoding them, the two GRPs cannot be automatically + // expressed as a GPRPair, so we have to manually merge them. + // FIXME: We would really like to be able to tablegen'erate this. + if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) { + const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID); + bool isStore = Opcode == ARM::STREXD; + unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg(); + if (MRC.contains(Reg)) { + MCInst NewMI; + MCOperand NewReg; + NewMI.setOpcode(Opcode); + + if (isStore) + NewMI.addOperand(MI->getOperand(0)); + NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0, + &MRI.getRegClass(ARM::GPRPairRegClassID))); + NewMI.addOperand(NewReg); + + // Copy the rest operands into NewMI. + for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i) + NewMI.addOperand(MI->getOperand(i)); + printInstruction(&NewMI, O); + return; + } + } + printInstruction(MI, O); printAnnotation(O, Annot); } @@ -264,7 +293,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, printRegName(O, Reg); } else if (Op.isImm()) { O << markup("<imm:") - << '#' << Op.getImm() + << '#' << formatImm(Op.getImm()) << markup(">"); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); @@ -290,7 +319,7 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, O << *MO1.getExpr(); else if (MO1.isImm()) { O << markup("<mem:") << "[pc, " - << markup("<imm:") << "#" << MO1.getImm() + << markup("<imm:") << "#" << formatImm(MO1.getImm()) << markup(">]>", "]"); } else @@ -598,8 +627,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, O << markup("<mem:") << "["; printRegName(O, MO1.getReg()); if (MO2.getImm()) { - // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << (MO2.getImm() << 3); + O << ":" << (MO2.getImm() << 3); } O << "]" << markup(">"); } @@ -691,6 +719,15 @@ void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum, O << "}"; } +void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0)); + O << ", "; + printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1)); +} + + void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); @@ -873,7 +910,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << markup("<imm:") - << "#" << MI->getOperand(OpNum).getImm() * 4 + << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4) << markup(">"); } @@ -881,7 +918,7 @@ void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); O << markup("<imm:") - << "#" << (Imm == 0 ? 32 : Imm) + << "#" << formatImm((Imm == 0 ? 32 : Imm)) << markup(">"); } @@ -938,7 +975,7 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI, if (unsigned ImmOffs = MO2.getImm()) { O << ", " << markup("<imm:") - << "#" << ImmOffs * Scale + << "#" << formatImm(ImmOffs * Scale) << markup(">"); } O << "]" << markup(">"); @@ -1089,7 +1126,7 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI, if (MO2.getImm()) { O << ", " << markup("<imm:") - << "#" << MO2.getImm() * 4 + << "#" << formatImm(MO2.getImm() * 4) << markup(">"); } O << "]" << markup(">"); @@ -1179,7 +1216,7 @@ void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); O << markup("<imm:") - << "#" << Imm + 1 + << "#" << formatImm(Imm + 1) << markup(">"); } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index b7bab5f..edff75d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -124,6 +124,7 @@ public: void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT new file mode 100755 index 0000000..68afea1 --- /dev/null +++ b/lib/Target/ARM/LICENSE.TXT @@ -0,0 +1,47 @@ +ARM Limited + +Software Grant License Agreement ("Agreement") + +Except for the license granted herein to you, ARM Limited ("ARM") reserves all +right, title, and interest in and to the Software (defined below). + +Definition + +"Software" means the code and documentation as well as any original work of +authorship, including any modifications or additions to an existing work, that +is intentionally submitted by ARM to llvm.org (http://llvm.org) ("LLVM") for +inclusion in, or documentation of, any of the products owned or managed by LLVM +(the "Work"). For the purposes of this definition, "submitted" means any form of +electronic, verbal, or written communication sent to LLVM or its +representatives, including but not limited to communication on electronic +mailing lists, source code control systems, and issue tracking systems that are +managed by, or on behalf of, LLVM for the purpose of discussing and improving +the Work, but excluding communication that is conspicuously marked otherwise. + +1. Grant of Copyright License. Subject to the terms and conditions of this + Agreement, ARM hereby grants to you and to recipients of the Software + distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, + royalty-free, irrevocable copyright license to reproduce, prepare derivative + works of, publicly display, publicly perform, sublicense, and distribute the + Software and such derivative works. + +2. Grant of Patent License. Subject to the terms and conditions of this + Agreement, ARM hereby grants you and to recipients of the Software + distributed by LLVM a perpetual, worldwide, non-exclusive, no-charge, + royalty-free, irrevocable (except as stated in this section) patent license + to make, have made, use, offer to sell, sell, import, and otherwise transfer + the Work, where such license applies only to those patent claims licensable + by ARM that are necessarily infringed by ARM's Software alone or by + combination of the Software with the Work to which such Software was + submitted. If any entity institutes patent litigation against ARM or any + other entity (including a cross-claim or counterclaim in a lawsuit) alleging + that ARM's Software, or the Work to which ARM has contributed constitutes + direct or contributory patent infringement, then any patent licenses granted + to that entity under this Agreement for the Software or Work shall terminate + as of the date such litigation is filed. + +Unless required by applicable law or agreed to in writing, the software is +provided on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +either express or implied, including, without limitation, any warranties or +conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 1ba6ab0..e66e985 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -8,9 +8,11 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" -#include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -21,7 +23,6 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" @@ -114,11 +115,15 @@ public: MCValue &Target, uint64_t &Value, bool &IsResolved); + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + bool mayNeedRelaxation(const MCInst &Inst) const; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const; void relaxInstruction(const MCInst &Inst, MCInst &Res) const; @@ -161,7 +166,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCInstFragment *DF, + const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { switch ((unsigned)Fixup.getKind()) { case ARM::fixup_arm_thumb_br: { @@ -216,7 +221,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP - const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 + const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0 const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP if (isThumb()) { const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding @@ -552,65 +557,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, (void)adjustFixupValue(Fixup, Value, &Asm.getContext()); } -namespace { - -// FIXME: This should be in a separate file. -// ELF is an ELF of course... -class ELFARMAsmBackend : public ARMAsmBackend { -public: - uint8_t OSABI; - ELFARMAsmBackend(const Target &T, const StringRef TT, - uint8_t _OSABI) - : ARMAsmBackend(T, TT), OSABI(_OSABI) { } - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARMELFObjectWriter(OS, OSABI); - } -}; - -// FIXME: Raise this to share code between Darwin and ELF. -void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value) const { - unsigned NumBytes = 4; // FIXME: 2 for Thumb - Value = adjustFixupValue(Fixup, Value); - if (!Value) return; // Doesn't change encoding. - - unsigned Offset = Fixup.getOffset(); - - // For each byte of the fragment that the fixup touches, mask in the bits from - // the fixup value. The Value has been "split up" into the appropriate - // bitfields above. - for (unsigned i = 0; i != NumBytes; ++i) - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); -} - -// FIXME: This should be in a separate file. -class DarwinARMAsmBackend : public ARMAsmBackend { -public: - const object::mach::CPUSubtypeARM Subtype; - DarwinARMAsmBackend(const Target &T, const StringRef TT, - object::mach::CPUSubtypeARM st) - : ARMAsmBackend(T, TT), Subtype(st) { - HasDataInCodeSupport = true; - } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARMMachObjectWriter(OS, /*Is64Bit=*/false, - object::mach::CTM_ARM, - Subtype); - } - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - return false; - } -}; - /// getFixupKindNumBytes - The number of bytes the fixup may change. static unsigned getFixupKindNumBytes(unsigned Kind) { switch (Kind) { @@ -659,8 +605,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { } } -void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value) const { +void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); Value = adjustFixupValue(Fixup, Value); if (!Value) return; // Doesn't change encoding. @@ -668,37 +614,70 @@ void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned Offset = Fixup.getOffset(); assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - // For each byte of the fragment that the fixup touches, mask in the - // bits from the fixup value. + // For each byte of the fragment that the fixup touches, mask in the bits from + // the fixup value. The Value has been "split up" into the appropriate + // bitfields above. for (unsigned i = 0; i != NumBytes; ++i) Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); } +namespace { + +// FIXME: This should be in a separate file. +// ELF is an ELF of course... +class ELFARMAsmBackend : public ARMAsmBackend { +public: + uint8_t OSABI; + ELFARMAsmBackend(const Target &T, const StringRef TT, + uint8_t _OSABI) + : ARMAsmBackend(T, TT), OSABI(_OSABI) { } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createARMELFObjectWriter(OS, OSABI); + } +}; + +// FIXME: This should be in a separate file. +class DarwinARMAsmBackend : public ARMAsmBackend { +public: + const object::mach::CPUSubtypeARM Subtype; + DarwinARMAsmBackend(const Target &T, const StringRef TT, + object::mach::CPUSubtypeARM st) + : ARMAsmBackend(T, TT), Subtype(st) { + HasDataInCodeSupport = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return createARMMachObjectWriter(OS, /*Is64Bit=*/false, + object::mach::CTM_ARM, + Subtype); + } + + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { + return false; + } +}; + } // end anonymous namespace MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) { Triple TheTriple(TT); if (TheTriple.isOSDarwin()) { - if (TheTriple.getArchName() == "armv4t" || - TheTriple.getArchName() == "thumbv4t") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V4T); - else if (TheTriple.getArchName() == "armv5e" || - TheTriple.getArchName() == "thumbv5e") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V5TEJ); - else if (TheTriple.getArchName() == "armv6" || - TheTriple.getArchName() == "thumbv6") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V6); - else if (TheTriple.getArchName() == "armv7f" || - TheTriple.getArchName() == "thumbv7f") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7F); - else if (TheTriple.getArchName() == "armv7k" || - TheTriple.getArchName() == "thumbv7k") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7K); - else if (TheTriple.getArchName() == "armv7s" || - TheTriple.getArchName() == "thumbv7s") - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7S); - return new DarwinARMAsmBackend(T, TT, object::mach::CSARM_V7); + object::mach::CPUSubtypeARM CS = + StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T) + .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ) + .Cases("armv6", "thumbv6", object::mach::CSARM_V6) + .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M) + .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM) + .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F) + .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K) + .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M) + .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S) + .Default(object::mach::CSARM_V7); + + return new DarwinARMAsmBackend(T, TT, CS); } if (TheTriple.isOSWindows()) diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 99e4f71..f98bbd2 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -37,7 +37,6 @@ namespace { virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) const; - virtual unsigned getEFlags() const; virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, const MCValue &Target, const MCFragment &F, @@ -53,11 +52,6 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) ARMELFObjectWriter::~ARMELFObjectWriter() {} -// FIXME: get the real EABI Version from the Triple. -unsigned ARMELFObjectWriter::getEFlags() const { - return ELF::EF_ARM_EABIMASK & DefaultEABIVersion; -} - // In ARM, _MergedGlobals and other most symbols get emitted directly. // I.e. not as an offset to a section symbol. // This code is an approximation of what ARM/gcc does. @@ -133,6 +127,7 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, switch (RelocType) { default: EmitThisSym = true; break; case ELF::R_ARM_ABS32: EmitThisSym = false; break; + case ELF::R_ARM_PREL31: EmitThisSym = false; break; } } @@ -225,6 +220,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case FK_Data_4: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_ARM_NONE: + Type = ELF::R_ARM_NONE; + break; case MCSymbolRefExpr::VK_ARM_GOT: Type = ELF::R_ARM_GOT_BREL; break; @@ -249,7 +247,10 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_TARGET2: Type = ELF::R_ARM_TARGET2; break; - } + case MCSymbolRefExpr::VK_ARM_PREL31: + Type = ELF::R_ARM_PREL31; + break; + } break; case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp new file mode 100644 index 0000000..418971d --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -0,0 +1,418 @@ +//===- lib/MC/ARMELFStreamer.cpp - ELF Object Output for ARM --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ARM ELF .o object files. Different +// from generic ELF streamer in emitting mapping symbols ($a, $t and $d) to +// delimit regions of data and code. +// +//===----------------------------------------------------------------------===// + +#include "ARMUnwindOp.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCELF.h" +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +/// Extend the generic ELFStreamer class so that it can emit mapping symbols at +/// the appropriate points in the object files. These symbols are defined in the +/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf. +/// +/// In brief: $a, $t or $d should be emitted at the start of each contiguous +/// region of ARM code, Thumb code or data in a section. In practice, this +/// emission does not rely on explicit assembler directives but on inherent +/// properties of the directives doing the emission (e.g. ".byte" is data, "add +/// r0, r0, r0" an instruction). +/// +/// As a result this system is orthogonal to the DataRegion infrastructure used +/// by MachO. Beware! +class ARMELFStreamer : public MCELFStreamer { +public: + ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter, bool IsThumb) + : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter), + IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0), + FnStart(0), Personality(0), CantUnwind(false) {} + + ~ARMELFStreamer() {} + + // ARM exception handling directives + virtual void EmitFnStart(); + virtual void EmitFnEnd(); + virtual void EmitCantUnwind(); + virtual void EmitPersonality(const MCSymbol *Per); + virtual void EmitHandlerData(); + virtual void EmitSetFP(unsigned NewFpReg, + unsigned NewSpReg, + int64_t Offset = 0); + virtual void EmitPad(int64_t Offset); + virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector); + + virtual void ChangeSection(const MCSection *Section) { + // We have to keep track of the mapping symbol state of any sections we + // use. Each one should start off as EMS_None, which is provided as the + // default constructor by DenseMap::lookup. + LastMappingSymbols[getPreviousSection()] = LastEMS; + LastEMS = LastMappingSymbols.lookup(Section); + + MCELFStreamer::ChangeSection(Section); + } + + /// This function is the one used to emit instruction data into the ELF + /// streamer. We override it to add the appropriate mapping symbol if + /// necessary. + virtual void EmitInstruction(const MCInst& Inst) { + if (IsThumb) + EmitThumbMappingSymbol(); + else + EmitARMMappingSymbol(); + + MCELFStreamer::EmitInstruction(Inst); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if + /// necessary. + virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitBytes(Data, AddrSpace); + } + + /// This is one of the functions used to emit data into an ELF section, so the + /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if + /// necessary. + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + EmitDataMappingSymbol(); + MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + } + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { + MCELFStreamer::EmitAssemblerFlag(Flag); + + switch (Flag) { + case MCAF_SyntaxUnified: + return; // no-op here. + case MCAF_Code16: + IsThumb = true; + return; // Change to Thumb mode + case MCAF_Code32: + IsThumb = false; + return; // Change to ARM mode + case MCAF_Code64: + return; + case MCAF_SubsectionsViaSymbols: + return; + } + } + + static bool classof(const MCStreamer *S) { + return S->getKind() == SK_ARMELFStreamer; + } + +private: + enum ElfMappingSymbol { + EMS_None, + EMS_ARM, + EMS_Thumb, + EMS_Data + }; + + void EmitDataMappingSymbol() { + if (LastEMS == EMS_Data) return; + EmitMappingSymbol("$d"); + LastEMS = EMS_Data; + } + + void EmitThumbMappingSymbol() { + if (LastEMS == EMS_Thumb) return; + EmitMappingSymbol("$t"); + LastEMS = EMS_Thumb; + } + + void EmitARMMappingSymbol() { + if (LastEMS == EMS_ARM) return; + EmitMappingSymbol("$a"); + LastEMS = EMS_ARM; + } + + void EmitMappingSymbol(StringRef Name) { + MCSymbol *Start = getContext().CreateTempSymbol(); + EmitLabel(Start); + + MCSymbol *Symbol = + getContext().GetOrCreateSymbol(Name + "." + + Twine(MappingSymbolCounter++)); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + MCELF::SetType(SD, ELF::STT_NOTYPE); + MCELF::SetBinding(SD, ELF::STB_LOCAL); + SD.setExternal(false); + Symbol->setSection(*getCurrentSection()); + + const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); + Symbol->setVariableValue(Value); + } + + void EmitThumbFunc(MCSymbol *Func) { + // FIXME: Anything needed here to flag the function as thumb? + + getAssembler().setIsThumbFunc(Func); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func); + SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc); + } + + // Helper functions for ARM exception handling directives + void Reset(); + + void EmitPersonalityFixup(StringRef Name); + + void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags, + SectionKind Kind, const MCSymbol &Fn); + void SwitchToExTabSection(const MCSymbol &FnStart); + void SwitchToExIdxSection(const MCSymbol &FnStart); + + bool IsThumb; + int64_t MappingSymbolCounter; + + DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols; + ElfMappingSymbol LastEMS; + + // ARM Exception Handling Frame Information + MCSymbol *ExTab; + MCSymbol *FnStart; + const MCSymbol *Personality; + bool CantUnwind; +}; +} + +inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, + unsigned Type, + unsigned Flags, + SectionKind Kind, + const MCSymbol &Fn) { + const MCSectionELF &FnSection = + static_cast<const MCSectionELF &>(Fn.getSection()); + + // Create the name for new section + StringRef FnSecName(FnSection.getSectionName()); + SmallString<128> EHSecName(Prefix); + if (FnSecName != ".text") { + EHSecName += FnSecName; + } + + // Get .ARM.extab or .ARM.exidx section + const MCSectionELF *EHSection = NULL; + if (const MCSymbol *Group = FnSection.getGroup()) { + EHSection = getContext().getELFSection( + EHSecName, Type, Flags | ELF::SHF_GROUP, Kind, + FnSection.getEntrySize(), Group->getName()); + } else { + EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind); + } + assert(EHSection); + + // Switch to .ARM.extab or .ARM.exidx section + SwitchSection(EHSection); + EmitCodeAlignment(4, 0); +} + +inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) { + SwitchToEHSection(".ARM.extab", + ELF::SHT_PROGBITS, + ELF::SHF_ALLOC, + SectionKind::getDataRel(), + FnStart); +} + +inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) { + SwitchToEHSection(".ARM.exidx", + ELF::SHT_ARM_EXIDX, + ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER, + SectionKind::getDataRel(), + FnStart); +} + +void ARMELFStreamer::Reset() { + ExTab = NULL; + FnStart = NULL; + Personality = NULL; + CantUnwind = false; +} + +// Add the R_ARM_NONE fixup at the same position +void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { + const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name); + + const MCSymbolRefExpr *PersonalityRef = + MCSymbolRefExpr::Create(PersonalitySym, + MCSymbolRefExpr::VK_ARM_NONE, + getContext()); + + AddValueSymbols(PersonalityRef); + MCDataFragment *DF = getOrCreateDataFragment(); + DF->getFixups().push_back( + MCFixup::Create(DF->getContents().size(), PersonalityRef, + MCFixup::getKindForSize(4, false))); +} + +void ARMELFStreamer::EmitFnStart() { + assert(FnStart == 0); + FnStart = getContext().CreateTempSymbol(); + EmitLabel(FnStart); +} + +void ARMELFStreamer::EmitFnEnd() { + assert(FnStart && ".fnstart must preceeds .fnend"); + + // Emit unwind opcodes if there is no .handlerdata directive + int PersonalityIndex = -1; + if (!ExTab && !CantUnwind) { + // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab. + SwitchToExTabSection(*FnStart); + + // Create .ARM.extab label for offset in .ARM.exidx + ExTab = getContext().CreateTempSymbol(); + EmitLabel(ExTab); + + PersonalityIndex = 1; + + uint32_t Entry = 0; + uint32_t NumExtraEntryWords = 0; + Entry |= NumExtraEntryWords << 24; + Entry |= (EHT_COMPACT | PersonalityIndex) << 16; + + // TODO: This should be generated according to .save, .vsave, .setfp + // directives. Currently, we are simply generating FINISH opcode. + Entry |= UNWIND_OPCODE_FINISH << 8; + Entry |= UNWIND_OPCODE_FINISH; + + EmitIntValue(Entry, 4, 0); + } + + // Emit the exception index table entry + SwitchToExIdxSection(*FnStart); + + if (PersonalityIndex == 1) + EmitPersonalityFixup("__aeabi_unwind_cpp_pr1"); + + const MCSymbolRefExpr *FnStartRef = + MCSymbolRefExpr::Create(FnStart, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + + EmitValue(FnStartRef, 4, 0); + + if (CantUnwind) { + EmitIntValue(EXIDX_CANTUNWIND, 4, 0); + } else { + const MCSymbolRefExpr *ExTabEntryRef = + MCSymbolRefExpr::Create(ExTab, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + EmitValue(ExTabEntryRef, 4, 0); + } + + // Clean exception handling frame information + Reset(); +} + +void ARMELFStreamer::EmitCantUnwind() { + CantUnwind = true; +} + +void ARMELFStreamer::EmitHandlerData() { + SwitchToExTabSection(*FnStart); + + // Create .ARM.extab label for offset in .ARM.exidx + assert(!ExTab); + ExTab = getContext().CreateTempSymbol(); + EmitLabel(ExTab); + + // Emit Personality + assert(Personality && ".personality directive must preceed .handlerdata"); + + const MCSymbolRefExpr *PersonalityRef = + MCSymbolRefExpr::Create(Personality, + MCSymbolRefExpr::VK_ARM_PREL31, + getContext()); + + EmitValue(PersonalityRef, 4, 0); + + // Emit unwind opcodes + uint32_t Entry = 0; + uint32_t NumExtraEntryWords = 0; + + // TODO: This should be generated according to .save, .vsave, .setfp + // directives. Currently, we are simply generating FINISH opcode. + Entry |= NumExtraEntryWords << 24; + Entry |= UNWIND_OPCODE_FINISH << 16; + Entry |= UNWIND_OPCODE_FINISH << 8; + Entry |= UNWIND_OPCODE_FINISH; + + EmitIntValue(Entry, 4, 0); +} + +void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) { + Personality = Per; +} + +void ARMELFStreamer::EmitSetFP(unsigned NewFpReg, + unsigned NewSpReg, + int64_t Offset) { + // TODO: Not implemented +} + +void ARMELFStreamer::EmitPad(int64_t Offset) { + // TODO: Not implemented +} + +void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool IsVector) { + // TODO: Not implemented +} + +namespace llvm { + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack, + bool IsThumb) { + ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + if (NoExecStack) + S->getAssembler().setNoExecStack(true); + return S; + } + +} + + diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h new file mode 100644 index 0000000..77ae5d2 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h @@ -0,0 +1,27 @@ +//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF streamer information for the ARM backend. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_ELF_STREAMER_H +#define ARM_ELF_STREAMER_H + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack, + bool IsThumb); +} + +#endif // ARM_ELF_STREAMER_H diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index d0e127a..7a59a7d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -24,8 +26,6 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -655,15 +655,28 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, int32_t offset = MO.getImm(); uint32_t Val = 0x2000; + int SoImmVal; if (offset == INT32_MIN) { Val = 0x1000; - offset = 0; + SoImmVal = 0; } else if (offset < 0) { Val = 0x1000; offset *= -1; + SoImmVal = ARM_AM::getSOImmVal(offset); + if(SoImmVal == -1) { + Val = 0x2000; + offset *= -1; + SoImmVal = ARM_AM::getSOImmVal(offset); + } + } else { + SoImmVal = ARM_AM::getSOImmVal(offset); + if(SoImmVal == -1) { + Val = 0x1000; + offset *= -1; + SoImmVal = ARM_AM::getSOImmVal(offset); + } } - int SoImmVal = ARM_AM::getSOImmVal(offset); assert(SoImmVal != -1 && "Not a valid so_imm value!"); Val |= SoImmVal; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 22e14a2..fc8505b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -9,8 +9,8 @@ #define DEBUG_TYPE "armmcexpr" #include "ARMMCExpr.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" using namespace llvm; const ARMMCExpr* diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index b404e6c..cd4067a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -64,6 +64,9 @@ public: return getSubExpr()->FindAssociatedSection(); } + // There are no TLS ARMMCExprs at the moment. + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 00ffc94..f09fb5a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -11,10 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "ARMMCTargetDesc.h" -#include "ARMMCAsmInfo.h" #include "ARMBaseInfo.h" +#include "ARMELFStreamer.h" +#include "ARMMCAsmInfo.h" +#include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -36,6 +38,8 @@ using namespace llvm; std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { + Triple triple(TT); + // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. unsigned Len = TT.size(); @@ -118,6 +122,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { ARMArchFeature += ",+thumb-mode"; } + if (triple.isOSNaCl()) { + if (ARMArchFeature.empty()) + ARMArchFeature = "+nacl-trap"; + else + ARMArchFeature += ",+nacl-trap"; + } + return ARMArchFeature; } @@ -144,7 +155,7 @@ static MCInstrInfo *createARMMCInstrInfo() { static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) { MCRegisterInfo *X = new MCRegisterInfo(); - InitARMMCRegisterInfo(X, ARM::LR); + InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC); return X; } @@ -186,7 +197,8 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, llvm_unreachable("ARM does not support Windows COFF format"); } - return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack); + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, + TheTriple.getArch() == Triple::thumb); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 2154c93..b9efe74 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,17 +7,18 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachOSymbolFlags.h" +#include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h new file mode 100644 index 0000000..dad5576 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h @@ -0,0 +1,112 @@ +//===-- ARMUnwindOp.h - ARM Unwind Opcodes ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the constants for the ARM unwind opcodes and exception +// handling table entry kinds. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_UNWIND_OP_H +#define ARM_UNWIND_OP_H + +namespace llvm { + + /// ARM exception handling table entry kinds + enum ARMEHTEntryKind { + EHT_GENERIC = 0x00, + EHT_COMPACT = 0x80 + }; + + enum { + /// Special entry for the function never unwind + EXIDX_CANTUNWIND = 0x1 + }; + + /// ARM-defined frame unwinding opcodes + enum ARMUnwindOpcodes { + // Format: 00xxxxxx + // Purpose: vsp = vsp + ((x << 2) + 4) + UNWIND_OPCODE_INC_VSP = 0x00, + + // Format: 01xxxxxx + // Purpose: vsp = vsp - ((x << 2) + 4) + UNWIND_OPCODE_DEC_VSP = 0x40, + + // Format: 10000000 00000000 + // Purpose: refuse to unwind + UNWIND_OPCODE_REFUSE = 0x8000, + + // Format: 1000xxxx xxxxxxxx + // Purpose: pop r[15:12], r[11:4] + // Constraint: x != 0 + UNWIND_OPCODE_POP_REG_MASK_R4 = 0x8000, + + // Format: 1001xxxx + // Purpose: vsp = r[x] + // Constraint: x != 13 && x != 15 + UNWIND_OPCODE_SET_VSP = 0x90, + + // Format: 10100xxx + // Purpose: pop r[(4+x):4] + UNWIND_OPCODE_POP_REG_RANGE_R4 = 0xa0, + + // Format: 10101xxx + // Purpose: pop r14, r[(4+x):4] + UNWIND_OPCODE_POP_REG_RANGE_R4_R14 = 0xa8, + + // Format: 10110000 + // Purpose: finish + UNWIND_OPCODE_FINISH = 0xb0, + + // Format: 10110001 0000xxxx + // Purpose: pop r[3:0] + // Constraint: x != 0 + UNWIND_OPCODE_POP_REG_MASK = 0xb100, + + // Format: 10110010 x(uleb128) + // Purpose: vsp = vsp + ((x << 2) + 0x204) + UNWIND_OPCODE_INC_VSP_ULEB128 = 0xb2, + + // Format: 10110011 xxxxyyyy + // Purpose: pop d[(x+y):x] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX = 0xb300, + + // Format: 10111xxx + // Purpose: pop d[(8+x):8] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDX_D8 = 0xb8, + + // Format: 11000xxx + // Purpose: pop wR[(10+x):10] + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE_WR10 = 0xc0, + + // Format: 11000110 xxxxyyyy + // Purpose: pop wR[(x+y):x] + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_RANGE = 0xc600, + + // Format: 11000111 0000xxxx + // Purpose: pop wCGR[3:0] + // Constraint: x != 0 + UNWIND_OPCODE_POP_WIRELESS_MMX_REG_MASK = 0xc700, + + // Format: 11001000 xxxxyyyy + // Purpose: pop d[(16+x+y):(16+x)] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 = 0xc800, + + // Format: 11001001 xxxxyyyy + // Purpose: pop d[(x+y):x] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD = 0xc900, + + // Format: 11010xxx + // Purpose: pop d[(8+x):8] + UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0 + }; + +} + +#endif // ARM_UNWIND_OP_H diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index 2565994..e17eb4d 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMARMDesc ARMAsmBackend.cpp ARMELFObjectWriter.cpp + ARMELFStreamer.cpp ARMMCAsmInfo.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 70643bc..2e266c2 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -16,16 +16,16 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; static cl::opt<bool> diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 3e48ed1..f069535 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -16,7 +16,7 @@ BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ - ARMGenEDInfo.inc ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ + ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index 463c440..a64707e 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -173,7 +173,6 @@ GCC is doing a couple of clever things here: mov r1, #1 lsl r1, r1, #8 tst r2, r1 - //===---------------------------------------------------------------------===// @@ -196,7 +195,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack objects are referenced off the frame pointer with negative offsets. See oggenc for an example. - //===---------------------------------------------------------------------===// Poor codegen test/CodeGen/ARM/select.ll f7: diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index 500e3de..fa5681f 100644 --- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index edd73c2..2c3388c 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -43,6 +43,41 @@ emitSPUpdate(MachineBasicBlock &MBB, MRI, MIFlags); } + +void Thumb1FrameLowering:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const Thumb1InstrInfo &TII = + *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); + const Thumb1RegisterInfo *RegInfo = + static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + emitSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); + } else { + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); + } + } + } + MBB.erase(I); +} + void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -124,14 +159,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + bool HasFP = hasFP(MF); + if (HasFP) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); NumBytes = DPRCSOffset; // Adjust FP so it point to the stack slot that contains the previous FP. - if (hasFP(MF)) { + if (HasFP) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlags(MachineInstr::FrameSetup)); @@ -146,7 +184,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, MachineInstr::FrameSetup); - if (STI.isTargetELF() && hasFP(MF)) + if (STI.isTargetELF() && HasFP) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); @@ -281,7 +319,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); AddDefaultPred(MIB); - MIB->copyImplicitOps(&*MBBI); + MIB.copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } @@ -352,7 +390,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - MIB->copyImplicitOps(&*MI); + MIB.copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index bcfc516..5a300af 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -45,6 +45,10 @@ public: const TargetRegisterInfo *TRI) const; bool hasReservedCallFrame(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 735b255..095736d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -15,8 +15,8 @@ #include "ARM.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index a39b722..7452fb7 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -18,21 +18,21 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { extern cl::opt<bool> ReuseFrameIndexVals; @@ -296,47 +296,6 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, } } -static void emitSPUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, - int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, - MRI); -} - -void Thumb1RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - emitSPUpdate(MBB, I, TII, dl, *this, -Amount); - } else { - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(MBB, I, TII, dl, *this, Amount); - } - } - } - MBB.erase(I); -} - /// emitThumbConstant - Emit a series of instructions to materialize a /// constant. static void emitThumbConstant(MachineBasicBlock &MBB, @@ -390,6 +349,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); DebugLoc dl = MI.getDebugLoc(); + MachineInstrBuilder MIB(*MBB.getParent(), &MI); unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); @@ -417,7 +377,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); // Remove offset MI.RemoveOperand(FrameRegIdx+1); - MachineInstrBuilder MIB(&MI); return true; } @@ -428,7 +387,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, if (Opcode == ARM::tADDi3) { MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); - MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) .addImm(Offset / Scale)); } else { @@ -457,7 +415,6 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, if (Opcode == ARM::tADDi3) { MI.setDesc(TII.get(Opcode)); removeOperands(MI, FrameRegIdx); - MachineInstrBuilder MIB(&MI); AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); } else { MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); @@ -595,22 +552,18 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { unsigned VReg = 0; - unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } + MachineInstrBuilder MIB(*MBB.getParent(), &MI); unsigned FrameReg = ARM::SP; - int FrameIndex = MI.getOperand(i).getIndex(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + MF.getFrameInfo()->getStackSize() + SPAdj; @@ -635,7 +588,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // means the stack pointer cannot be used to access the emergency spill slot // when !hasReservedCallFrame(). #ifndef NDEBUG - if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); @@ -647,15 +600,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special handling of dbg_value instructions. if (MI.isDebugValue()) { - MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(i+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); return; } // Modify MI as necessary to handle as much of 'Offset' as possible assert(AFI->isThumbFunction() && "This eliminateFrameIndex only supports Thumb1!"); - if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII)) + if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) return; // If we get here, the immediate doesn't fit into the instruction. We folded @@ -688,11 +641,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(TmpReg, false, false, true); if (UseRR) // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. - MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); bool UseRR = false; @@ -709,18 +663,17 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); - MI.getOperand(i).ChangeToRegister(VReg, false, false, true); + MI.getOperand(FIOperandNum).ChangeToRegister(VReg, false, false, true); if (UseRR) // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. - MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToRegister(FrameReg, false, false, + false); } else { llvm_unreachable("Unexpected opcode!"); } // Add predicate back if it's needed. - if (MI.isPredicable()) { - MachineInstrBuilder MIB(&MI); + if (MI.isPredicable()) AddDefaultPred(MIB); - } } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index f2e4b08..ebbab36 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -43,11 +43,6 @@ public: unsigned PredReg = 0, unsigned MIFlags = MachineInstr::NoFlags) const; - /// Code Generation virtual methods... - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be // however much remains to be handled. Return 'true' if no further // work is required. @@ -62,7 +57,8 @@ public: const TargetRegisterClass *RC, unsigned Reg) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS = NULL) const; }; } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index d54aa93..97c254c 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -11,12 +11,12 @@ #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumITs, "Number of IT blocks inserted"); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index e9e20dd..67e8ec7 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -51,7 +51,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, MachineBasicBlock *MBB = Tail->getParent(); ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); if (!AFI->hasITBlocks()) { - TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); return; } @@ -65,7 +65,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, --MBBI; // Actually replace the tail. - TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest); // Fix up IT. if (CC != ARMCC::AL) { @@ -408,7 +408,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Remove offset and remaining explicit predicate operands. do MI.RemoveOperand(FrameRegIdx+1); while (MI.getNumOperands() > FrameRegIdx+1); - MachineInstrBuilder MIB(&MI); + MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); AddDefaultPred(MIB); return true; } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 29a87d0..1a7a4d4 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -16,12 +16,12 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" using namespace llvm; Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index f18f491..d50f5d9 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -9,19 +9,21 @@ #define DEBUG_TYPE "t2-reduce-size" #include "ARM.h" -#include "ARMBaseRegisterInfo.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMSubtarget.h" -#include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/Function.h" // To access Function attributes #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); @@ -52,81 +54,79 @@ namespace { unsigned PredCC2 : 2; unsigned PartFlag : 1; // 16-bit instruction does partial flag update unsigned Special : 1; // Needs to be dealt with specially + unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift) }; static const ReduceEntry ReduceTable[] = { - // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S - { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, - { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1 }, - { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, - { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, - { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, - { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, - //FIXME: Disable CMN, as CCodes are backwards from compare expectations - //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, - { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, - // FIXME: adr.n immediate offset must be multiple of 4. - //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, - { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 }, - // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less - // likely to cause issue in the loop. As a size / performance workaround, - // they are not marked as such. - { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, - // FIXME: Do we need the 16-bit 'S' variant? - { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0 }, - { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 }, - { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 }, - { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 }, - { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 }, - { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 }, - { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, - { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, - { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, - { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1 }, - { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1 }, - - // FIXME: Clean this up after splitting each Thumb load / store opcode - // into multiple ones. - { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 }, - { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - - { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 }, - // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent - { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 }, - { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 }, + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM + { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 }, + { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 }, + { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 }, + { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 }, + //FIXME: Disable CMN, as CCodes are backwards from compare expectations + //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 }, + { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 }, + // FIXME: adr.n immediate offset must be multiple of 4. + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 }, + { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 }, + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 }, + // FIXME: Do we need the 16-bit 'S' variant? + { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 }, + { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 }, + { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 }, + { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 }, + { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 }, + { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 }, + { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 }, + { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 }, + + // FIXME: Clean this up after splitting each Thumb load / store opcode + // into multiple ones. + { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 }, + { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 }, + + { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 }, + // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent + { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 }, + { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 } }; class Thumb2SizeReduce : public MachineFunctionPass { @@ -147,8 +147,7 @@ namespace { /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. DenseMap<unsigned, unsigned> ReduceOpcodeMap; - bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, - bool IsSelfLoop); + bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop); bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, @@ -158,30 +157,52 @@ namespace { const ReduceEntry &Entry); bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, - const ReduceEntry &Entry, bool LiveCPSR, - MachineInstr *CPSRDef, bool IsSelfLoop); + const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// instruction. bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, - const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef, + const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// non-two-address instruction. bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, - const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef, + const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop); + /// ReduceMI - Attempt to reduce MI, return true on success. + bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, + bool LiveCPSR, bool IsSelfLoop); + /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); + + bool OptimizeSize; + bool MinimizeSize; + + // Last instruction to define CPSR in the current block. + MachineInstr *CPSRDef; + // Was CPSR last defined by a high latency instruction? + // When CPSRDef is null, this refers to CPSR defs in predecessors. + bool HighLatencyCPSR; + + struct MBBInfo { + // The flags leaving this block have high latency. + bool HighLatencyCPSR; + // Has this block been visited yet? + bool Visited; + + MBBInfo() : HighLatencyCPSR(false), Visited(false) {} + }; + + SmallVector<MBBInfo, 8> BlockInfo; }; char Thumb2SizeReduce::ID = 0; } Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { + OptimizeSize = MinimizeSize = false; for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { unsigned FromOpc = ReduceTable[i].WideOpc; if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) @@ -196,6 +217,16 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { return false; } +// Check for a likely high-latency flag def. +static bool isHighLatencyCPSR(MachineInstr *Def) { + switch(Def->getOpcode()) { + case ARM::FMSTAT: + case ARM::tMUL: + return true; + } + return false; +} + /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, /// the 's' 16-bit instruction partially update CPSR. Abort the /// transformation to avoid adding false dependency on last CPSR setting @@ -214,20 +245,19 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { /// In this case it would have been ok to narrow the mul.w to muls since there /// are indirect RAW dependency between the muls and the mul.w bool -Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, - bool FirstInSelfLoop) { - // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder). - if (!STI->avoidCPSRPartialUpdate()) +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) { + // Disable the check for -Oz (aka OptimizeForSizeHarder). + if (MinimizeSize || !STI->avoidCPSRPartialUpdate()) return false; - if (!Def) + if (!CPSRDef) // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. - return FirstInSelfLoop; + return HighLatencyCPSR || FirstInSelfLoop; SmallSet<unsigned, 2> Defs; - for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = Def->getOperand(i); + for (unsigned i = 0, e = CPSRDef->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = CPSRDef->getOperand(i); if (!MO.isReg() || MO.isUndef() || MO.isUse()) continue; unsigned Reg = MO.getReg(); @@ -245,6 +275,16 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, return false; } + // If the current CPSR has high latency, try to avoid the false dependency. + if (HighLatencyCPSR) + return true; + + // tMOVi8 usually doesn't start long dependency chains, and there are a lot + // of them, so always shrink them when CPSR doesn't have high latency. + if (Use->getOpcode() == ARM::t2MOVi || + Use->getOpcode() == ARM::t2MOVi16) + return false; + // No read-after-write dependency. The narrowing will add false dependency. return true; } @@ -487,16 +527,15 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef, - bool IsSelfLoop) { + bool LiveCPSR, bool IsSelfLoop) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::t2ADDri) { // If the source register is SP, try to reduce to tADDrSPi, otherwise // it's a normal reduce. if (MI->getOperand(1).getReg() != ARM::SP) { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } // Try to reduce to tADDrSPi. unsigned Imm = MI->getOperand(2).getImm(); @@ -546,12 +585,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, switch (Opc) { default: break; case ARM::t2ADDSri: { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) return true; // fallthrough } case ARM::t2ADDSrr: - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } } break; @@ -563,13 +602,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2UXTB: case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two @@ -578,10 +617,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = - { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; - if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop)) + { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 }; + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop); } } return false; @@ -590,12 +629,17 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef, - bool IsSelfLoop) { + bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; + if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && + STI->avoidMOVsShifterOperand()) + // Don't issue movs with shifter operand for some CPUs unless we + // are optimizing / minimizing for size. + return false; + unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); // t2MUL is "special". The tied source operand is second, not first. @@ -666,7 +710,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && - canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) + canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. @@ -703,11 +747,16 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef, - bool IsSelfLoop) { + bool LiveCPSR, bool IsSelfLoop) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; + if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs && + STI->avoidMOVsShifterOperand()) + // Don't issue movs with shifter operand for some CPUs unless we + // are optimizing / minimizing for size. + return false; + unsigned Limit = ~0U; if (Entry.Imm1Limit) Limit = (1 << Entry.Imm1Limit) - 1; @@ -757,7 +806,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && - canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) + canAddPseudoFlagDep(MI, IsSelfLoop)) return false; // Add the 16-bit instruction. @@ -841,14 +890,57 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { return LiveCPSR; } +bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, + bool LiveCPSR, bool IsSelfLoop) { + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); + if (OPI == ReduceOpcodeMap.end()) + return false; + const ReduceEntry &Entry = ReduceTable[OPI->second]; + + // Don't attempt normal reductions on "special" cases for now. + if (Entry.Special) + return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop); + + // Try to transform to a 16-bit two-address instruction. + if (Entry.NarrowOpc2 && + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) + return true; + + // Try to transform to a 16-bit non-two-address instruction. + if (Entry.NarrowOpc1 && + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop)) + return true; + + return false; +} + bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); - MachineInstr *CPSRDef = 0; MachineInstr *BundleMI = 0; + CPSRDef = 0; + HighLatencyCPSR = false; + + // Check predecessors for the latest CPSRDef. + bool HasBackEdges = false; + for (MachineBasicBlock::pred_iterator + I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { + const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; + if (!PInfo.Visited) { + // Since blocks are visited in RPO, this must be a back-edge. + HasBackEdges = true; + continue; + } + if (PInfo.HighLatencyCPSR) { + HighLatencyCPSR = true; + break; + } + } + // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); @@ -862,43 +954,25 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { BundleMI = MI; continue; } + if (MI->isDebugValue()) + continue; LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); - unsigned Opcode = MI->getOpcode(); - DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode); - if (OPI != ReduceOpcodeMap.end()) { - const ReduceEntry &Entry = ReduceTable[OPI->second]; - // Ignore "special" cases for now. - if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - } - goto ProcessNext; - } - - // Try to transform to a 16-bit two-address instruction. - if (Entry.NarrowOpc2 && - ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - goto ProcessNext; - } - - // Try to transform to a 16-bit non-two-address instruction. - if (Entry.NarrowOpc1 && - ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { - Modified = true; - MachineBasicBlock::instr_iterator I = prior(NextMII); - MI = &*I; - } + // Does NextMII belong to the same bundle as MI? + bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred(); + + if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) { + Modified = true; + MachineBasicBlock::instr_iterator I = prior(NextMII); + MI = &*I; + // Removing and reinserting the first instruction in a bundle will break + // up the bundle. Fix the bundling if it was broken. + if (NextInSameBundle && !NextMII->isBundledWithPred()) + NextMII->bundleWithPred(); } - ProcessNext: - if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { + if (!NextInSameBundle && MI->isInsideBundle()) { // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill // marker is only on the BUNDLE instruction. Process the BUNDLE // instruction as we finish with the bundled instruction to work around @@ -915,14 +989,19 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; + HighLatencyCPSR = false; IsSelfLoop = false; } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; + HighLatencyCPSR = isHighLatencyCPSR(CPSRDef); IsSelfLoop = false; } } + MBBInfo &Info = BlockInfo[MBB.getNumber()]; + Info.HighLatencyCPSR = HighLatencyCPSR; + Info.Visited = true; return Modified; } @@ -931,9 +1010,23 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); STI = &TM.getSubtarget<ARMSubtarget>(); + // Optimizing / minimizing size? + AttributeSet FnAttrs = MF.getFunction()->getAttributes(); + OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize); + MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize); + + BlockInfo.clear(); + BlockInfo.resize(MF.getNumBlockIDs()); + + // Visit blocks in reverse post-order so LastCPSRDef is known for all + // predecessors. + ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); bool Modified = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Modified |= ReduceMBB(*I); + for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator + I = RPOT.begin(), E = RPOT.end(); I != E; ++I) + Modified |= ReduceMBB(**I); return Modified; } |