diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM/ARMFastISel.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/ARM/ARMFastISel.cpp | 375 |
1 files changed, 280 insertions, 95 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 5d45f64..a4004f3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -20,6 +20,7 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -175,6 +176,8 @@ class ARMFastISel : public FastISel { // Utility routines. private: + unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum, + unsigned Op); bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -251,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); - // If we're a thumb2 or not NEON function we were handled via isPredicable. + // If we're a thumb2 or not NEON function we'll be handled via isPredicable. if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || AFI->isThumb2Function()) - return false; + return MI->isPredicable(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) if (MCID.OpInfo[i].isPredicate()) @@ -275,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { // Do we use a predicate? or... // Are we NEON in ARM mode and have a predicate operand? If so, I know // we're not predicable but add it anyways. - if (TII.isPredicable(MI) || isARMNEONPred(MI)) + if (isARMNEONPred(MI)) AddDefaultPred(MIB); // Do we optionally set a predicate? Preds is size > 0 iff the predicate @@ -290,6 +293,23 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } +unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II, + unsigned Op, unsigned OpNum) { + if (TargetRegisterInfo::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op)); + return NewOp; + } + } + return Op; +} + unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); @@ -305,6 +325,9 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill)); @@ -325,6 +348,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -348,6 +376,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); + Op2 = constrainOperandRegClass(II, Op1, 3); + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -372,6 +406,9 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -394,6 +431,9 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -417,6 +457,10 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -609,6 +653,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { .addConstantPoolIndex(Idx)); else // The extra immediate is for addrmode2. + DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) @@ -628,6 +673,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); + // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG. + const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); + bool IsThreadLocal = GVar && GVar->isThreadLocal(); + if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0; + // Use movw+movt when possible, it avoids constant pool entries. // Darwin targets don't support movt with Reloc::Static, see // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support @@ -679,6 +729,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. + DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) @@ -814,22 +865,19 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { switch (Opcode) { default: break; - case Instruction::BitCast: { + case Instruction::BitCast: // Look through bitcasts. return ARMComputeAddress(U->getOperand(0), Addr); - } - case Instruction::IntToPtr: { + case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } - case Instruction::PtrToInt: { + case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } case Instruction::GetElementPtr: { Address SavedAddr = Addr; int TmpOffset = Addr.Offset; @@ -852,13 +900,8 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { TmpOffset += CI->getSExtValue() * S; break; } - if (isa<AddOperator>(Op) && - (!isa<Instruction>(Op) || - FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] - == FuncInfo.MBB) && - isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { - // An add (in the same block) with a constant operand. Fold the - // constant. + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. ConstantInt *CI = cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; @@ -1025,7 +1068,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, useAM3 = true; } } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i16: if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) @@ -1040,7 +1083,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, Opc = isZExt ? ARM::LDRH : ARM::LDRSH; useAM3 = true; } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i32: if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) @@ -1054,7 +1097,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, } else { Opc = ARM::LDRi12; } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; @@ -1063,7 +1106,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, needVMOV = true; VT = MVT::i32; Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; } else { Opc = ARM::VLDRS; RC = TLI.getRegClassFor(VT); @@ -1136,6 +1179,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, (const TargetRegisterClass*)&ARM::tGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; + SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), Res) .addReg(SrcReg).addImm(1)); @@ -1207,6 +1251,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. + SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) .addReg(SrcReg); @@ -1330,6 +1375,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; unsigned OpReg = getRegForValue(TI->getOperand(0)); + OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(OpReg).addImm(1)); @@ -1367,6 +1413,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; + CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(CmpReg).addImm(1)); @@ -1491,13 +1538,15 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, } } + const MCInstrDesc &II = TII.get(CmpOpc); + SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0); if (!UseImm) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CmpOpc)) + SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(SrcReg1).addReg(SrcReg2)); } else { MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(SrcReg1); // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. @@ -1696,6 +1745,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { } unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; + CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) .addReg(CondReg).addImm(0)); @@ -1712,12 +1762,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; } unsigned ResultReg = createResultReg(RC); - if (!UseImm) + if (!UseImm) { + Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1); + Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); - else + } else { + Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); + } UpdateValueMap(I, ResultReg); return true; } @@ -1802,7 +1856,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { unsigned SrcReg2 = getRegForValue(I->getOperand(1)); if (SrcReg2 == 0) return false; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); + SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1); + SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2)); @@ -1930,7 +1986,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, !VA.isRegLoc() || !ArgLocs[++i].isRegLoc()) return false; } else { - switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) { + switch (ArgVT.SimpleTy) { default: return false; case MVT::i1: @@ -1985,7 +2041,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); - assert (Arg != 0 && "Failed to emit a sext"); + assert (Arg != 0 && "Failed to emit a zext"); ArgVT = DestVT; break; } @@ -2182,10 +2238,14 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { } unsigned ARMFastISel::getLibcallReg(const Twine &Name) { + // Manually compute the global's type to avoid building it when unnecessary. + Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0); + EVT LCREVT = TLI.getValueType(GVTy); + if (!LCREVT.isSimple()) return 0; + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, GlobalValue::ExternalLinkage, 0, Name); - EVT LCREVT = TLI.getValueType(GV->getType()); - if (!LCREVT.isSimple()) return 0; + assert(GV->getType() == GVTy && "We miscomputed the type for the global!"); return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } @@ -2403,15 +2463,22 @@ bool ARMFastISel::SelectCall(const Instruction *I, MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); + unsigned char OpFlags = 0; + + // Add MO_PLT for global address or external symbol in the PIC relocation + // model. + if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) + OpFlags = ARMII::MO_PLT; + // ARM calls don't take a predicate, but tBL / tBLX do. if(isThumb2) AddDefaultPred(MIB); if (UseReg) MIB.addReg(CalleeReg); else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); + MIB.addGlobalAddress(GV, 0, OpFlags); else - MIB.addExternalSymbol(IntrMemName, 0); + MIB.addExternalSymbol(IntrMemName, OpFlags); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -2602,47 +2669,136 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return 0; + if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1) + return 0; - unsigned Opc; - bool isBoolZext = false; - const TargetRegisterClass *RC; - switch (SrcVT.SimpleTy) { - default: return 0; - case MVT::i16: - if (!Subtarget->hasV6Ops()) return 0; - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; - if (isZExt) - Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; - else - Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; - break; - case MVT::i8: - if (!Subtarget->hasV6Ops()) return 0; - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; - if (isZExt) - Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; - else - Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; - break; - case MVT::i1: - if (isZExt) { - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; - Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; - isBoolZext = true; - break; + // Table of which combinations can be emitted as a single instruction, + // and which will require two. + static const uint8_t isSingleInstrTbl[3][2][2][2] = { + // ARM Thumb + // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops + // ext: s z s z s z s z + /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } }, + /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }, + /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } } + }; + + // Target registers for: + // - For ARM can never be PC. + // - For 16-bit Thumb are restricted to lower 8 registers. + // - For 32-bit Thumb are restricted to non-SP and non-PC. + static const TargetRegisterClass *RCTbl[2][2] = { + // Instructions: Two Single + /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass }, + /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass } + }; + + // Table governing the instruction(s) to be emitted. + static const struct InstructionTable { + uint32_t Opc : 16; + uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0. + uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi. + uint32_t Imm : 8; // All instructions have either a shift or a mask. + } IT[2][2][3][2] = { + { // Two instructions (first is left shift, second is in this table). + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 }, + /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } }, + /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 }, + /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } }, + /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 }, + /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } } + }, + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 }, + /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } }, + /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 }, + /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } }, + /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 }, + /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } } + } + }, + { // Single instruction. + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } } + }, + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } } + } } - return 0; + }; + + unsigned SrcBits = SrcVT.getSizeInBits(); + unsigned DestBits = DestVT.getSizeInBits(); + (void) DestBits; + assert((SrcBits < DestBits) && "can only extend to larger types"); + assert((DestBits == 32 || DestBits == 16 || DestBits == 8) && + "other sizes unimplemented"); + assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) && + "other sizes unimplemented"); + + bool hasV6Ops = Subtarget->hasV6Ops(); + unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2} + assert((Bitness < 3) && "sanity-check table bounds"); + + bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt]; + const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr]; + const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt]; + unsigned Opc = ITP->Opc; + assert(ARM::KILL != Opc && "Invalid table entry"); + unsigned hasS = ITP->hasS; + ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift; + assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) && + "only MOVsi has shift operand addressing mode"); + unsigned Imm = ITP->Imm; + + // 16-bit Thumb instructions always set CPSR (unless they're in an IT block). + bool setsCPSR = &ARM::tGPRRegClass == RC; + unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi; + unsigned ResultReg; + // MOVsi encodes shift and immediate in shift operand addressing mode. + // The following condition has the same value when emitting two + // instruction sequences: both are shifts. + bool ImmIsSO = (Shift != ARM_AM::no_shift); + + // Either one or two instructions are emitted. + // They're always of the form: + // dst = in OP imm + // CPSR is set only by 16-bit Thumb instructions. + // Predicate, if any, is AL. + // S bit, if available, is always 0. + // When two are emitted the first's result will feed as the second's input, + // that value is then dead. + unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2; + for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) { + ResultReg = createResultReg(RC); + bool isLsl = (0 == Instr) && !isSingleInstr; + unsigned Opcode = isLsl ? LSLOpc : Opc; + ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift; + unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm; + bool isKill = 1 == Instr; + MachineInstrBuilder MIB = BuildMI( + *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg); + if (setsCPSR) + MIB.addReg(ARM::CPSR, RegState::Define); + SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR); + AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc)); + if (hasS) + AddDefaultCC(MIB); + // Second instruction consumes the first's result. + SrcReg = ResultReg; } - unsigned ResultReg = createResultReg(RC); - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) - .addReg(SrcReg); - if (isBoolZext) - MIB.addImm(1); - else - MIB.addImm(0); - AddOptionalDefs(MIB); return ResultReg; } @@ -2707,7 +2863,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, if (Reg2 == 0) return false; } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); if(ResultReg == 0) return false; MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -2797,6 +2953,25 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return false; } +namespace { +// This table describes sign- and zero-extend instructions which can be +// folded into a preceding load. All of these extends have an immediate +// (sometimes a mask and sometimes a shift) that's applied after +// extension. +const struct FoldableLoadExtendsStruct { + uint16_t Opc[2]; // ARM, Thumb. + uint8_t ExpectedImm; + uint8_t isZExt : 1; + uint8_t ExpectedVT : 7; +} FoldableLoadExtends[] = { + { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 }, + { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 }, + { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 }, + { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 }, + { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 } +}; +} + /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if @@ -2812,26 +2987,23 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, // ldrb r1, [r0] ldrb r1, [r0] // uxtb r2, r1 => // mov r3, r2 mov r3, r1 - bool isZExt = true; - switch(MI->getOpcode()) { - default: return false; - case ARM::SXTH: - case ARM::t2SXTH: - isZExt = false; - case ARM::UXTH: - case ARM::t2UXTH: - if (VT != MVT::i16) - return false; - break; - case ARM::SXTB: - case ARM::t2SXTB: - isZExt = false; - case ARM::UXTB: - case ARM::t2UXTB: - if (VT != MVT::i8) - return false; - break; + if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm()) + return false; + const uint64_t Imm = MI->getOperand(2).getImm(); + + bool Found = false; + bool isZExt; + for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends); + i != e; ++i) { + if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() && + (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm && + MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) { + Found = true; + isZExt = FoldableLoadExtends[i].isZExt; + } } + if (!Found) return false; + // See if we can handle this address. Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; @@ -2854,12 +3026,14 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); // Load value. if (isThumb2) { + DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRpci), DestReg1) .addConstantPoolIndex(Idx)); Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; } else { // The extra immediate is for addrmode2. + DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg1) .addConstantPoolIndex(Idx).addImm(0)); @@ -2873,6 +3047,9 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, } unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); + DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0); + DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1); + GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg2) .addReg(DestReg1) @@ -2938,12 +3115,10 @@ bool ARMFastISel::FastLowerArguments() { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC = &ARM::rGPRRegClass; Idx = 0; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++Idx) { - if (I->use_empty()) - continue; unsigned SrcReg = GPRArgRegs[Idx]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. @@ -2961,13 +3136,23 @@ bool ARMFastISel::FastLowerArguments() { namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { - // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); - // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); - if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) + // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. + bool UseFastISel = false; + UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only(); + UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb(); + UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb(); + + if (UseFastISel) { + // iOS always has a FP for backtracking, force other targets + // to keep their FP when doing FastISel. The emitted code is + // currently superior, and in cases like test-suite's lencod + // FastISel isn't quite correct when FP is eliminated. + TM.Options.NoFramePointerElim = true; return new ARMFastISel(funcInfo, libInfo); + } return 0; } } |