diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-05-04 16:11:02 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-05-04 16:11:02 +0000 |
commit | 750ce4d809c7e2a298a389a512a17652ff5be3f2 (patch) | |
tree | 70fbd90da02177c8e6ef82adba9fa8ace285a5e3 /lib/Target/ARM | |
parent | 5f970ec96e421f64db6b1c6509a902ea73d98cc7 (diff) | |
download | FreeBSD-src-750ce4d809c7e2a298a389a512a17652ff5be3f2.zip FreeBSD-src-750ce4d809c7e2a298a389a512a17652ff5be3f2.tar.gz |
Update LLVM to r103004.
Diffstat (limited to 'lib/Target/ARM')
49 files changed, 2884 insertions, 1493 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 8d9c622..b4dec0c 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -124,7 +124,8 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, FeatureNEONForFP]>; -def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : Processor<"cortex-a9", CortexA9Itineraries, + [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index ea62c33..e68354a 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -151,22 +151,13 @@ namespace ARM_AM { if ((rotr32(Imm, RotAmt) & ~255U) == 0) return (32-RotAmt)&31; // HW rotates right, not left. - // For values like 0xF000000F, we should skip the first run of ones, then + // For values like 0xF000000F, we should ignore the low 6 bits, then // retry the hunt. - if (Imm & 1) { - unsigned TrailingOnes = CountTrailingZeros_32(~Imm); - if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF - // Restart the search for a high-order bit after the initial seconds of - // ones. - unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1)); - - // Rotate amount must be even. - unsigned RotAmt2 = TZ2 & ~1; - - // If this fits, use it. - if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0) - return (32-RotAmt2)&31; // HW rotates right, not left. - } + if (Imm & 63U) { + unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U); + unsigned RotAmt2 = TZ2 & ~1; + if ((rotr32(Imm, RotAmt2) & ~255U) == 0) + return (32-RotAmt2)&31; // HW rotates right, not left. } // Otherwise, we have no way to cover this span of bits with a single diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 1995f79..a193858 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -467,6 +467,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::KILL: case TargetOpcode::DBG_LABEL: case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: return 0; } break; @@ -481,10 +482,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // operand #2. return MI->getOperand(2).getImm(); case ARM::Int_eh_sjlj_setjmp: + case ARM::Int_eh_sjlj_setjmp_nofp: return 24; case ARM::tInt_eh_sjlj_setjmp: - return 14; case ARM::t2Int_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp_nofp: return 14; case ARM::BR_JTr: case ARM::BR_JTm: @@ -815,6 +817,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } +MachineInstr* +ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + MachineInstr *ARMBaseInstrInfo:: foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, int FI) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 292c498..7a5630e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -269,6 +269,12 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + virtual bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index f162546..bc12187 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -38,11 +38,14 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" -using namespace llvm; +namespace llvm { cl::opt<bool> ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), cl::desc("Reuse repeated frame index values")); +} + +using namespace llvm; unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { @@ -478,7 +481,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((NoFramePointerElim && MFI->hasCalls())|| + return ((DisableFramePointerElim(MF) && MFI->hasCalls())|| needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -506,7 +509,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (NoFramePointerElim && MFI->hasCalls()) + if (DisableFramePointerElim(MF) && MFI->hasCalls()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); @@ -1050,7 +1053,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = + const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); @@ -1180,6 +1183,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, SPAdj = 0; Offset += SPAdj; + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; + } + // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; if (!AFI->isThumbFunction()) diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index e7aa0c8..f84f85a 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -64,7 +64,8 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(&ID), JTI(0), II((ARMInstrInfo*)tm.getInstrInfo()), + : MachineFunctionPass(&ID), JTI(0), + II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -150,7 +151,7 @@ namespace { /// Routines that handle operands which add machine relocations which are /// fixed up by the relocation stage. - void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, bool MayNeedFarStub, bool Indirect, intptr_t ACPV = 0); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); @@ -174,9 +175,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo(); - II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo(); - TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData(); + JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo(); + II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo(); + TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -249,14 +250,16 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, /// emitGlobalAddress - Emit the specified address to the code stream. /// -void ARMCodeEmitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, +void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, bool MayNeedFarStub, bool Indirect, intptr_t ACPV) { MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, MayNeedFarStub) + const_cast<GlobalValue *>(GV), + ACPV, MayNeedFarStub) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, MayNeedFarStub); + const_cast<GlobalValue *>(GV), ACPV, + MayNeedFarStub); MCE.addRelocation(MR); } @@ -391,7 +394,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); assert(ACPV->isGlobalValue() && "unsupported constant pool value"); - GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = ACPV->getGV(); if (GV) { Reloc::Model RelocM = TM.getRelocationModel(); emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, @@ -403,7 +406,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { } emitWordLE(0); } else { - Constant *CV = MCPE.Val.ConstVal; + const Constant *CV = MCPE.Val.ConstVal; DEBUG({ errs() << " ** Constant pool #" << CPI << " @ " @@ -415,7 +418,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { errs() << '\n'; }); - if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false); emitWordLE(0); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { @@ -559,7 +562,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) { - llvm_report_error("JIT does not support inline asm!"); + report_fatal_error("JIT does not support inline asm!"); } break; } @@ -704,7 +707,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, const TargetInstrDesc &TID = MI.getDesc(); if (TID.Opcode == ARM::BFC) { - llvm_report_error("ARMv6t2 JIT is not yet supported."); + report_fatal_error("ARMv6t2 JIT is not yet supported."); } // Part of binary is determined by TableGn. diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 90dd0c7..f13ccc6 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -21,7 +21,7 @@ #include <cstdlib> using namespace llvm; -ARMConstantPoolValue::ARMConstantPoolValue(Constant *cval, unsigned id, +ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id, ARMCP::ARMCPKind K, unsigned char PCAdj, const char *Modif, @@ -39,16 +39,17 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol), PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} -ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif) +ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv, + const char *Modif) : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())), CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), Modifier(Modif) {} -GlobalValue *ARMConstantPoolValue::getGV() const { +const GlobalValue *ARMConstantPoolValue::getGV() const { return dyn_cast_or_null<GlobalValue>(CVal); } -BlockAddress *ARMConstantPoolValue::getBlockAddress() const { +const BlockAddress *ARMConstantPoolValue::getBlockAddress() const { return dyn_cast_or_null<BlockAddress>(CVal); } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 741acde..6f4eddf 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -36,7 +36,7 @@ namespace ARMCP { /// represent PC-relative displacement between the address of the load /// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)). class ARMConstantPoolValue : public MachineConstantPoolValue { - Constant *CVal; // Constant being loaded. + const Constant *CVal; // Constant being loaded. const char *S; // ExtSymbol being loaded. unsigned LabelId; // Label id of the load. ARMCP::ARMCPKind Kind; // Kind of constant. @@ -46,20 +46,20 @@ class ARMConstantPoolValue : public MachineConstantPoolValue { bool AddCurrentAddress; public: - ARMConstantPoolValue(Constant *cval, unsigned id, + ARMConstantPoolValue(const Constant *cval, unsigned id, ARMCP::ARMCPKind Kind = ARMCP::CPValue, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); - ARMConstantPoolValue(GlobalValue *GV, const char *Modifier); + ARMConstantPoolValue(const GlobalValue *GV, const char *Modifier); ARMConstantPoolValue(); ~ARMConstantPoolValue(); - GlobalValue *getGV() const; + const GlobalValue *getGV() const; const char *getSymbol() const { return S; } - BlockAddress *getBlockAddress() const; + const BlockAddress *getBlockAddress() const; const char *getModifier() const { return Modifier; } bool hasModifier() const { return Modifier != NULL; } bool mustAddCurrentAddress() const { return AddCurrentAddress; } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 1b8727d..845d088 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -91,7 +91,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); } else { - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7d48663..36a1827 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #include "ARM.h" #include "ARMAddressingModes.h" -#include "ARMISelLowering.h" #include "ARMTargetMachine.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" @@ -121,9 +120,6 @@ private: SDNode *SelectARMIndexedLoad(SDNode *N); SDNode *SelectT2IndexedLoad(SDNode *N); - /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. - SDNode *SelectDYN_ALLOC(SDNode *N); - /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// loads of D registers and even subregs and odd subregs of Q registers. @@ -146,7 +142,7 @@ private: unsigned *QOpcodes1); /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. - SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc); + SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); @@ -939,59 +935,6 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); - EVT VT = N->getValueType(0); - SDValue Chain = N->getOperand(0); - SDValue Size = N->getOperand(1); - SDValue Align = N->getOperand(2); - SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32); - int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue(); - if (AlignVal < 0) - // We need to align the stack. Use Thumb1 tAND which is the only thumb - // instruction that can read and write SP. This matches to a pseudo - // instruction that has a chain to ensure the result is written back to - // the stack pointer. - SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0); - - bool isC = isa<ConstantSDNode>(Size); - uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL; - // Handle the most common case for both Thumb1 and Thumb2: - // tSUBspi - immediate is between 0 ... 508 inclusive. - if (C <= 508 && ((C & 3) == 0)) - // FIXME: tSUBspi encode scale 4 implicitly. - return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP, - CurDAG->getTargetConstant(C/4, MVT::i32), - Chain); - - if (Subtarget->isThumb1Only()) { - // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering - // should have negated the size operand already. FIXME: We can't insert - // new target independent node at this stage so we are forced to negate - // it earlier. Is there a better solution? - return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size, - Chain); - } else if (Subtarget->isThumb2()) { - if (isC && Predicate_t2_so_imm(Size.getNode())) { - // t2SUBrSPi - SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3); - } else if (isC && Predicate_imm0_4095(Size.getNode())) { - // t2SUBrSPi12 - SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3); - } else { - // t2SUBrSPs - SDValue Ops[] = { SP, Size, - getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4); - } - } - - // FIXME: Add ADD / SUB sp instructions for ARM. - return 0; -} - /// PairDRegs - Insert a pair of double registers into an implicit def to /// form a quad register. SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { @@ -1052,7 +995,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; @@ -1142,7 +1085,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SmallVector<SDValue, 10> Ops; @@ -1249,7 +1192,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, case MVT::v4i32: OpcodeIndex = 1; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SmallVector<SDValue, 10> Ops; @@ -1305,10 +1248,42 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, - unsigned Opc) { + bool isSigned) { if (!Subtarget->hasV6T2Ops()) return NULL; + unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) + : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); + + + // For unsigned extracts, check for a shift right and mask + unsigned And_imm = 0; + if (N->getOpcode() == ISD::AND) { + if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { + + // The immediate is a mask of the low bits iff imm & (imm+1) == 0 + if (And_imm & (And_imm + 1)) + return NULL; + + unsigned Srl_imm = 0; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, + Srl_imm)) { + assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); + + unsigned Width = CountTrailingOnes_32(And_imm); + unsigned LSB = Srl_imm; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + CurDAG->getTargetConstant(Width, MVT::i32), + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + } + } + return NULL; + } + + // Otherwise, we're looking for a shift of a shift unsigned Shl_imm = 0; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); @@ -1531,7 +1506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDNode *ResNode; if (Subtarget->isThumb1Only()) { - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, @@ -1571,16 +1546,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } } - case ARMISD::DYN_ALLOC: - return SelectDYN_ALLOC(N); case ISD::SRL: - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, - Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX)) + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) return I; break; case ISD::SRA: - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, - Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)) + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) return I; break; case ISD::MUL: @@ -1624,6 +1595,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } break; case ISD::AND: { + // Check for unsigned bitfield extract + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) + return I; + // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits // are entirely contributed by c2 and lower 16-bits are entirely contributed @@ -1708,7 +1683,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue AM5Opc = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other, @@ -1724,7 +1699,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue AM5Opc = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), N->getOperand(2), AM5Opc, Pred, PredReg, Chain }; @@ -1816,7 +1791,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VZIPq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); @@ -1835,7 +1810,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VUZPq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); @@ -1854,7 +1829,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VTRNq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 77fb0c3..d3842a6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -40,12 +40,18 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <sstream> using namespace llvm; +static cl::opt<bool> +EnableARMLongCalls("arm-long-calls", cl::Hidden, + cl::desc("Generate calls via indirect call instructions."), + cl::init(false)); + static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, @@ -90,6 +96,8 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); @@ -376,10 +384,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // FIXME: Shouldn't need this, since no register is used, but the legalizer // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); - if (Subtarget->isThumb()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { @@ -783,7 +788,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -871,7 +876,7 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags) { + ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); @@ -889,7 +894,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVector<SDValue, 8> &MemOpChains, - ISD::ArgFlagsTy Flags) { + ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); @@ -918,7 +923,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // ARM target does not yet support tail call optimization. isTailCall = false; @@ -1025,8 +1030,44 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, bool isLocalARMFunc = false; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - GlobalValue *GV = G->getGlobal(); + + if (EnableARMLongCalls) { + assert (getTargetMachine().getRelocationModel() == Reloc::Static + && "long-calls with non-static relocation model!"); + // Handle a global address or an external symbol. If it's not one of + // those, the target's already in a register, so we don't need to do + // anything extra. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + // Create a constant pool entry for the callee address + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, + ARMPCLabelIndex, + ARMCP::CPValue, 0); + // Get the address of the callee into a register + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); + } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { + const char *Sym = S->getSymbol(); + + // Create a constant pool entry for the callee address + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), + Sym, ARMPCLabelIndex, 0); + // Get the address of the callee into a register + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); + } + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); isDirect = true; bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); bool isStub = (isExt && Subtarget->isTargetDarwin()) && @@ -1049,7 +1090,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); - } else + } else Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; @@ -1125,7 +1166,7 @@ SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -1232,13 +1273,14 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } -SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; DebugLoc DL = Op.getDebugLoc(); EVT PtrVT = getPointerTy(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; if (RelocM == Reloc::Static) { @@ -1264,7 +1306,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = GA->getDebugLoc(); EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; @@ -1303,8 +1345,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, // "local exec" model. SDValue ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG) { - GlobalValue *GV = GA->getGlobal(); + SelectionDAG &DAG) const { + const GlobalValue *GV = GA->getGlobal(); DebugLoc dl = GA->getDebugLoc(); SDValue Offset; SDValue Chain = DAG.getEntryNode(); @@ -1350,7 +1392,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, } SDValue -ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { +ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); @@ -1364,10 +1406,10 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { } SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); @@ -1404,13 +1446,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, } SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; if (RelocM == Reloc::Static) @@ -1443,7 +1485,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, } SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, - SelectionDAG &DAG){ + SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); MachineFunction &MF = DAG.getMachineFunction(); @@ -1466,7 +1508,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { + const ARMSubtarget *Subtarget) + const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { @@ -1533,20 +1576,23 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, return Res; } -static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, - unsigned VarArgsFrameIndex) { +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); + // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. DebugLoc dl = Op.getDebugLoc(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, false, false, 0); } SDValue -ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); EVT VT = Node->getValueType(0); @@ -1595,7 +1641,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) { + DebugLoc dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1611,10 +1657,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue ArgValue2; if (NextVA.isMemLoc()) { - unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1635,7 +1679,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1663,14 +1708,22 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. - RegVT = MVT::i32; - if (VA.getLocVT() == MVT::v2f64) { SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); VA = ArgLocs[++i]; // skip ahead to next loc - SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], - Chain, DAG, dl); + SDValue ArgValue2; + if (VA.isMemLoc()) { + int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), + true, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); + } else { + ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], + Chain, DAG, dl); + } ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); @@ -1758,10 +1811,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // to their spots on the stack so that they may be loaded by deferencing // the result of va_next. AFI->setVarArgsRegSaveSize(VARegSaveSize); - VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + - VARegSaveSize - VARegSize, - true, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + AFI->setVarArgsFrameIndex( + MFI->CreateFixedObject(VARegSaveSize, + ArgOffset + VARegSaveSize - VARegSize, + true, false)); + SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), + getPointerTy()); SmallVector<SDValue, 4> MemOps; for (; NumGPRs < 4; ++NumGPRs) { @@ -1773,9 +1828,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0, - false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0, + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1785,7 +1841,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, + true, false)); } return Chain; @@ -1800,7 +1857,7 @@ static bool isFloatingPointZero(SDValue Op) { if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { SDValue WrapperOp = Op.getOperand(1).getOperand(0); if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) - if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } } @@ -1811,7 +1868,8 @@ static bool isFloatingPointZero(SDValue Op) { /// the given operands. SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) { + SDValue &ARMCC, SelectionDAG &DAG, + DebugLoc dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { @@ -1877,7 +1935,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } -SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -1911,7 +1969,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { return Result; } -SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -1945,7 +2003,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { return Res; } -SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); @@ -2034,7 +2092,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } -SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); @@ -2055,8 +2113,10 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff){ + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) @@ -2157,11 +2217,25 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); } +/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to +/// expand a bit convert where either the source or destination type is i64 to +/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 +/// operand type is illegal (e.g., v2f32 for a target that doesn't support +/// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { - SDValue Op = N->getOperand(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); DebugLoc dl = N->getDebugLoc(); - if (N->getValueType(0) == MVT::f64) { - // Turn i64->f64 into VMOVDRR. + SDValue Op = N->getOperand(0); + + // This function is only supposed to be called for i64 types, either as the + // source or destination of the bit convert. + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); + assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && + "ExpandBIT_CONVERT called for non-i64 type"); + + // Turn i64->f64 into VMOVDRR. + if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, @@ -2170,11 +2244,14 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { } // Turn f64->i64 into VMOVRRD. - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { + SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); + } - // Merge the pieces into a single i64 value. - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); + return SDValue(); } /// getZeroVector - Returns a vector of specified type with all zero elements. @@ -2227,7 +2304,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -2262,7 +2340,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -3059,7 +3138,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); } -SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -3072,7 +3151,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); @@ -3105,22 +3184,22 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { + SDValue Res; switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); - return; + break; case ISD::BIT_CONVERT: - Results.push_back(ExpandBIT_CONVERT(N, DAG)); - return; + Res = ExpandBIT_CONVERT(N, DAG); + break; case ISD::SRL: - case ISD::SRA: { - SDValue Res = LowerShift(N, DAG, Subtarget); - if (Res.getNode()) - Results.push_back(Res); - return; - } + case ISD::SRA: + Res = LowerShift(N, DAG, Subtarget); + break; } + if (Res.getNode()) + Results.push_back(Res); } //===----------------------------------------------------------------------===// @@ -3302,8 +3381,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -3387,12 +3465,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index fa33ad3..d8a230f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -159,25 +159,24 @@ namespace llvm { // ARMTargetLowering - ARM Implementation of the TargetLowering interface class ARMTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. public: explicit ARMTargetLowering(TargetMachine &TM); - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual const char *getTargetNodeName(unsigned Opcode) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*>*) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. @@ -237,7 +236,7 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - virtual const ARMSubtarget* getSubtarget() { + virtual const ARMSubtarget* getSubtarget() const { return Subtarget; } @@ -272,54 +271,57 @@ namespace llvm { CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVector<SDValue, 8> &MemOpChains, - ISD::ArgFlagsTy Flags); + ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, - SDValue &Root, SelectionDAG &DAG, DebugLoc dl); + SDValue &Root, SelectionDAG &DAG, + DebugLoc dl) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags); - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG); + ISD::ArgFlagsTy Flags) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + const ARMSubtarget *Subtarget) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG); + SelectionDAG &DAG) const; SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG); - SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG); - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG); + SelectionDAG &DAG) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff); + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -328,16 +330,16 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl); + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index f2ab06f..ce5f2f8 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -124,6 +124,7 @@ def HasV6 : Predicate<"Subtarget->hasV6Ops()">; def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">; +def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; @@ -1231,7 +1232,7 @@ def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), } def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } @@ -2533,7 +2534,23 @@ let Defs = "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" "mov\tr0, #1 @ eh_setjmp end", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>; + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, HasVFP2]>; +} + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "add\t$val, pc, #8\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 @ eh_setjmp end", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, NoVFP]>; } //===----------------------------------------------------------------------===// @@ -2747,7 +2764,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_OFFSET : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "l\tp$cop, cr$CRd, $addr"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 0; // W = 0 @@ -2757,7 +2774,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_PRE : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "l\tp$cop, cr$CRd, $addr!"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 1; // W = 1 @@ -2767,7 +2784,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_POST : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - opc, "l\tp$cop, cr$CRd, [$base], $offset"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{21} = 1; // W = 1 @@ -2777,7 +2794,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_OPTION : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option), - opc, "l\tp$cop, cr$CRd, [$base], $option"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index ed9d31d..d5ce2b8 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1621,12 +1621,13 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, + def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, + def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp, Commutable>; } @@ -1642,11 +1643,12 @@ multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt, + : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp, Commutable> { - def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, + def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp, Commutable>; } @@ -1711,21 +1713,22 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, + def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; - def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, + def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; - def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, + def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. - def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, + def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; - def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, + def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; - def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, + def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1734,10 +1737,11 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp> { - def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, + def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, + def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } @@ -1751,9 +1755,10 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp> - : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> { - def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, + : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { + def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } @@ -2001,10 +2006,10 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s", - int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", - int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; @@ -2118,10 +2123,10 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s", - int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u", - int_arm_neon_vmullu, 1>; +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", int_arm_neon_vmullu, 1>; def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", @@ -2130,10 +2135,10 @@ defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s", - int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s", - int_arm_neon_vqdmull>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, + "vqdmull", "s", int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, + "vqdmull", "s", int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. @@ -2177,15 +2182,17 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "u", int_arm_neon_vmlalu>; defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) -defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s", - int_arm_neon_vqdmlal>; +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlal", "s", int_arm_neon_vqdmlal>; defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) @@ -2227,15 +2234,17 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "u", int_arm_neon_vmlslu>; defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) -defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s", - int_arm_neon_vqdmlsl>; +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; // Vector Subtract Operations. @@ -2248,26 +2257,26 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s", - int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", - int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vhsub", "s", int_arm_neon_vhsubs, 0>; defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vhsub", "u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "s", int_arm_neon_vqsubs, 0>; defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", @@ -2279,8 +2288,8 @@ defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>; +defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, @@ -2290,10 +2299,10 @@ defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$dst, $src, #0">; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; +defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; +defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, @@ -2306,10 +2315,10 @@ defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$dst, $src, #0">; // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>; +defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; +defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, @@ -2387,11 +2396,11 @@ def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, + (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>; def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, + (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>; def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>; @@ -2447,10 +2456,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabd", "s", int_arm_neon_vabds, 0>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabd", "u", int_arm_neon_vabdu, 0>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; @@ -2458,56 +2467,68 @@ def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; +defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, + "vabal", "s", int_arm_neon_vabals>; +defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, + "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax", - "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", - "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmax", "f32", + v2f32, v2f32, int_arm_neon_vmaxs, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmax", "f32", + v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmin", "s", int_arm_neon_vmins, 1>; defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmin", "u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin", - "f32", v2f32, v2f32, int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", - "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmin", "f32", + v2f32, v2f32, int_arm_neon_vmins, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmin", "f32", + v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd", - "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i8", + v8i8, v8i8, int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i16", + v4i16, v4i16, int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i32", + v2i32, v2i32, int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, + IIC_VBIND, "vpadd", "f32", + v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", @@ -2522,35 +2543,35 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin", "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 262aae4..742bd40 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2386,9 +2386,25 @@ let Defs = "\tb\t1f\n" "\tmovs\tr0, #1\t@ end eh.setjmp\n" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>; + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + Requires<[IsThumb2, HasVFP2]>; } +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + AddrModeNone, SizeSpecial, NoItinerary, + "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "\tmov\t$val, pc\n" + "\tadds\t$val, #9\n" + "\tstr\t$val, [$src, #4]\n" + "\tmovs\tr0, #0\n" + "\tb\t1f\n" + "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "1:", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + Requires<[IsThumb2, NoVFP]>; +} //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0458389..36fcaa1 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -256,25 +256,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a", + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f32_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a", + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f16_to_f32 GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a", + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f16.f32\t$dst, $a", + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; let neverHasSideEffects = 1 in { @@ -306,23 +306,23 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), // def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_VMOVSI, "vmov", "\t$dst, $src", + IIC_fpMOVSI, "vmov", "\t$dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vmov", "\t$dst, $src", + IIC_fpMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), - IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src", + IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]> { let Inst{7-6} = 0b00; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2), - IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", + IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } @@ -332,14 +332,14 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "vmov", "\t$dst, $src1, $src2", + IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> { let Inst{7-6} = 0b00; } def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", + IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } @@ -678,7 +678,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), - VFPMiscFrm, IIC_VMOVImm, + VFPMiscFrm, IIC_fpUNA64, "vmov", ".f64\t$dst, $imm", [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; @@ -689,7 +689,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), } def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), - VFPMiscFrm, IIC_VMOVImm, + VFPMiscFrm, IIC_fpUNA32, "vmov", ".f32\t$dst, $imm", [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 8c0b720..b31a4fa 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - llvm_report_error("ARMJITInfo::replaceMachineCodeForFunction"); + report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); } /// JITCompilerFunction - This contains the address of the JIT function used to diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index cb762a4..8585c1e 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1358,7 +1358,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; unsigned Align = (*Op0->memoperands_begin())->getAlignment(); - Function *Func = MF->getFunction(); + const Function *Func = MF->getFunction(); unsigned ReqAlign = STI->hasV6Ops() ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) : 8; // Pre-v6 need 8-byte align diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index c998ede..0134276 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -85,6 +85,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { unsigned ConstPoolEntryUId; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: ARMFunctionInfo() : isThumb(false), @@ -94,7 +97,7 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), - JumpTableUId(0), ConstPoolEntryUId(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -105,7 +108,7 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), - JumpTableUId(0), ConstPoolEntryUId(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -223,6 +226,9 @@ public: unsigned createConstPoolEntryUId() { return ConstPoolEntryUId++; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index fc4c5f5..b60ccca 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -8,17 +8,6 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Functional units across ARM processors -// -def FU_Issue : FuncUnit; // issue -def FU_Pipe0 : FuncUnit; // pipeline 0 -def FU_Pipe1 : FuncUnit; // pipeline 1 -def FU_LdSt0 : FuncUnit; // pipeline 0 load/store -def FU_LdSt1 : FuncUnit; // pipeline 1 load/store -def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe -def FU_NLSPipe : FuncUnit; // NEON LS pipe - -//===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM // def IIC_iALUx : InstrItinClass; @@ -69,10 +58,16 @@ def IIC_fpCMP32 : InstrItinClass; def IIC_fpCMP64 : InstrItinClass; def IIC_fpCVTSD : InstrItinClass; def IIC_fpCVTDS : InstrItinClass; +def IIC_fpCVTSH : InstrItinClass; +def IIC_fpCVTHS : InstrItinClass; def IIC_fpCVTIS : InstrItinClass; def IIC_fpCVTID : InstrItinClass; def IIC_fpCVTSI : InstrItinClass; def IIC_fpCVTDI : InstrItinClass; +def IIC_fpMOVIS : InstrItinClass; +def IIC_fpMOVID : InstrItinClass; +def IIC_fpMOVSI : InstrItinClass; +def IIC_fpMOVDI : InstrItinClass; def IIC_fpALU32 : InstrItinClass; def IIC_fpALU64 : InstrItinClass; def IIC_fpMUL32 : InstrItinClass; @@ -125,6 +120,10 @@ def IIC_VSUBiD : InstrItinClass; def IIC_VSUBiQ : InstrItinClass; def IIC_VBINi4D : InstrItinClass; def IIC_VBINi4Q : InstrItinClass; +def IIC_VSUBi4D : InstrItinClass; +def IIC_VSUBi4Q : InstrItinClass; +def IIC_VABAD : InstrItinClass; +def IIC_VABAQ : InstrItinClass; def IIC_VSHLiD : InstrItinClass; def IIC_VSHLiQ : InstrItinClass; def IIC_VSHLi4D : InstrItinClass; @@ -153,8 +152,8 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[]>; - +def GenericItineraries : ProcessorItineraries<[], []>; include "ARMScheduleV6.td" -include "ARMScheduleV7.td" +include "ARMScheduleA8.td" +include "ARMScheduleA9.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td new file mode 100644 index 0000000..bbfc0b2 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -0,0 +1,618 @@ +//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A8 processors. +// +//===----------------------------------------------------------------------===// + +// +// Scheduling information derived from "Cortex-A8 Technical Reference Manual". +// Functional Units. +def A8_Issue : FuncUnit; // issue +def A8_Pipe0 : FuncUnit; // pipeline 0 +def A8_Pipe1 : FuncUnit; // pipeline 1 +def A8_LdSt0 : FuncUnit; // pipeline 0 load/store +def A8_LdSt1 : FuncUnit; // pipeline 1 load/store +def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A8_NLSPipe : FuncUnit; // NEON LS pipe +// +// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 +// +def CortexA8Itineraries : ProcessorItineraries< + [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [ + // Two fully-pipelined integer ALU pipelines + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, + // + // Unary Instructions that produce a result + InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, + // + // Move instructions, conditional + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + + // Integer multiply pipeline + // Result written in E5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + + // Integer load pipeline + // + // loads have an extra cycle of latency, but are fully pipelined + // use A8_Issue to enforce the 1 load/store per cycle limit + // + // Immediate offset + InstrItinData<IIC_iLoadi , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iLoadr , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iLoadsi , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoadiu , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoadru , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, + // + // Load multiple + InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, + + // Integer store pipeline + // + // use A8_Issue to enforce the 1 load/store per cycle limit + // + // Immediate offset + InstrItinData<IIC_iStorei , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iStorer , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStoreru , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, + + // VFP + // Issue through integer pipeline, and execute in NEON unit. We assume + // RunFast mode so that NFP pipeline is used for single-precision when + // possible. + // + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Single-precision FP Unary + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<7, [A8_NPipe], 0>, + InstrStage<7, [A8_NLSPipe]>], [7, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<5, [A8_NPipe], 0>, + InstrStage<5, [A8_NLSPipe]>], [5, 1]>, + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<9, [A8_NPipe], 0>, + InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<11, [A8_NPipe], 0>, + InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<20, [A8_NPipe], 0>, + InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1]>, + // + // Single-precision FP Load + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-precision FP Load + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // FP Load Multiple + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Single-precision FP Store + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-precision FP Store + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // FP Store Multiple + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + + // NEON + // Issue through integer pipeline, and execute in NEON unit. + // + // VLD1 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // VLD2 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, + // + // VLD3 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, + // + // VLD4 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, + // + // VST + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-register FP Unary + InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [5, 2]>, + // + // Quad-register FP Unary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 2]>, + // + // Double-register FP Binary + InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, + // + // Quad-register FP Binary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, + // + // Quad-register Permute Move + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [20, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, + // + // Integer to Lane Move + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, + // + // Double-register Permute + InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, + // + // Quad-register Permute + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>, + // + // Quad-register Permute (3 cycle issue) + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [10, 2, 2]>, + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Count + // Result written in N3, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [4, 2, 2]>, + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [5, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 1]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 2, 2]>, + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 2, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 2, 2]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<3, [A8_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>, + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> +]>; diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td new file mode 100644 index 0000000..75320d9 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -0,0 +1,749 @@ +//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A9 processors. +// +//===----------------------------------------------------------------------===// + +// +// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical +// Reference Manual". +// +// Functional units +def A9_Issue : FuncUnit; // issue +def A9_Pipe0 : FuncUnit; // pipeline 0 +def A9_Pipe1 : FuncUnit; // pipeline 1 +def A9_LSPipe : FuncUnit; // LS pipe +def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A9_DRegsVFP: FuncUnit; // FP register set, VFP side +def A9_DRegsN : FuncUnit; // FP register set, NEON side + +// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 +// +def CortexA9Itineraries : ProcessorItineraries< + [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [ + // VFP and NEON shares the same register file. This means that every VFP + // instruction should wait for full completion of the consecutive NEON + // instruction and vice-versa. We model this behavior with two artificial FUs: + // DRegsVFP and DRegsVFP. + // + // Every VFP instruction: + // - Acquires DRegsVFP resource for 1 cycle + // - Reserves DRegsN resource for the whole duration (including time to + // register file writeback!). + // Every NEON instruction does the same but with FUs swapped. + // + // Since the reserved FU cannot be acquired this models precisly "cross-domain" + // stalls. + + // VFP + // Issue through integer pipeline, and execute in NEON unit. + + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>]>, + // + // Single-precision FP Unary + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 4 cycles + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 4 cycles + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + + // + // Single to Half FP Convert + InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Half to Single FP Convert + InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<6, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<7, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<16, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<10, [A9_NPipe]>], [15, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<26, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<20, [A9_NPipe]>], [25, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<18, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<13, [A9_NPipe]>], [17, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<33, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<28, [A9_NPipe]>], [32, 1]>, + + // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra 1 latency cycle since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra 1 latency cycle since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, + // + // Single-precision FP Load + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-precision FP Load + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // FP Load Multiple + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Single-precision FP Store + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-precision FP Store + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // FP Store Multiple + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // NEON + // Issue through integer pipeline, and execute in NEON unit. + // FIXME: Neon pipeline and LdSt unit are multiplexed. + // Add some syntactic sugar to model this! + // VLD1 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // VLD2 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, + // + // VLD3 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, + // + // VLD4 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, + // + // VST + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, + // + // Double-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, + + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Count + // Result written in N3, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 2, 2]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 2, 2]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 2, 2]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 2, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [2, 1]>, + // + // Quad-register Permute Move + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, + // + // Integer to Lane Move + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, + + // + // Double-register FP Unary + InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 2]>, + // + // Quad-register FP Unary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2]>, + // + // Double-register FP Binary + // FIXME: We're using this itin for many instructions and [2, 2] here is too + // optimistic. + InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 2, 2]>, + // + // Quad-register FP Binary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + // FIXME: We're using this itin for many instructions and [2, 2] here is too + // optimistic. + InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [8, 2, 2]>, + // + // Double-register Permute + InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>, + // + // Quad-register Permute + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>, + // + // Quad-register Permute (3 cycle issue) + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, + + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 2, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>, + InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> +]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 0fef466..f813022 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -13,103 +13,107 @@ // Model based on ARM1176 // +// Functional Units +def V6_Pipe : FuncUnit; // pipeline + // Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". // -def ARMV6Itineraries : ProcessorItineraries<[ +def ARMV6Itineraries : ProcessorItineraries< + [V6_Pipe], [ // // No operand cycles - InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>, // // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>, + InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>, // // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0]>], [2]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0]>], [3]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0]>], [3, 2]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0]>], [3, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, // Integer multiply pipeline // - InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>, - InstrItinData<IIC_iMUL32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>, - InstrItinData<IIC_iMUL64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>, + InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>, + InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>, // Integer load pipeline // // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Pipe0]>], [4, 1]>, + InstrItinData<IIC_iLoadi , [InstrStage<1, [V6_Pipe]>], [4, 1]>, // // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iLoadr , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>, + InstrItinData<IIC_iLoadsi , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>, // // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + InstrItinData<IIC_iLoadiu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>, + InstrItinData<IIC_iLoadru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>, + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>, // // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>, // Integer store pipeline // // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iStorei , [InstrStage<1, [V6_Pipe]>], [2, 1]>, // // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>, + InstrItinData<IIC_iStorer , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>, + InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>, // // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, + InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>, + InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>, // // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_iStorem , [InstrStage<3, [V6_Pipe]>]>, // Branch // // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>, // VFP // Issue through integer pipeline, and execute in NEON unit. We assume @@ -117,84 +121,84 @@ def ARMV6Itineraries : ProcessorItineraries<[ // possible. // // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>, + InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>, // // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>, // // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>, // // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>, // // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>, + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>, // // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>, + InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, // // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>, // // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>, // // Single-precision FP Load - InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>, // // Double-precision FP Load - InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>, // // FP Load Multiple - InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>, // // Single-precision FP Store - InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, // // Double-precision FP Store // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, // // FP Store Multiple - InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]> + InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td deleted file mode 100644 index bbbf413..0000000 --- a/lib/Target/ARM/ARMScheduleV7.td +++ /dev/null @@ -1,587 +0,0 @@ -//===- ARMScheduleV7.td - ARM v7 Scheduling Definitions ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the itinerary class data for the ARM v7 processors. -// -//===----------------------------------------------------------------------===// - -// -// Scheduling information derived from "Cortex-A8 Technical Reference Manual". -// -// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 -// -def CortexA8Itineraries : ProcessorItineraries<[ - - // Two fully-pipelined integer ALU pipelines - // - // No operand cycles - InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - // - // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>, - // - // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - // - // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - // - // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>, - // - // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - - // Integer multiply pipeline - // Result written in E5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - // - InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL32 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL64 , [InstrStage<2, [FU_Pipe1], 0>, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<2, [FU_Pipe1], 0>, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, - - // Integer load pipeline - // - // loads have an extra cycle of latency, but are fully pipelined - // use FU_Issue to enforce the 1 load/store per cycle limit - // - // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, - // - // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>, - // - // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>, - // - // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>, - // - // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>, - // - // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<2, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, - - // Integer store pipeline - // - // use FU_Issue to enforce the 1 load/store per cycle limit - // - // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, - // - // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>, - // - // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>, - // - // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>, - // - // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<2, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, - - // Branch - // - // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - - // VFP - // Issue through integer pipeline, and execute in NEON unit. We assume - // RunFast mode so that NFP pipeline is used for single-precision when - // possible. - // - // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, - // - // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, - // - // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, - // - // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<7, [FU_NPipe], 0>, - InstrStage<7, [FU_NLSPipe]>], [7, 1]>, - // - // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<5, [FU_NPipe], 0>, - InstrStage<5, [FU_NLSPipe]>], [5, 1]>, - // - // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, - // - // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, - // - // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, - // - // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<9, [FU_NPipe], 0>, - InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>, - // - // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, - // - // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<11, [FU_NPipe], 0>, - InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>, - // - // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>, - // - // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>, - // - // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<20, [FU_NPipe], 0>, - InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>, - // - // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>, - // - // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 1]>, - // - // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1]>, - // - // Single-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad64, [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // FP Load Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoadm, [InstrStage<3, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Single-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64,[InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // FP Store Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStorem, [InstrStage<3, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - - // NEON - // Issue through integer pipeline, and execute in NEON unit. - // - // VLD1 - InstrItinData<IIC_VLD1, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // VLD2 - InstrItinData<IIC_VLD2, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>, - // - // VLD3 - InstrItinData<IIC_VLD3, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>, - // - // VLD4 - InstrItinData<IIC_VLD4, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>, - // - // VST - InstrItinData<IIC_VST, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-register FP Unary - InstrItinData<IIC_VUNAD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2]>, - // - // Quad-register FP Unary - // Result written in N5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - InstrItinData<IIC_VUNAQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2]>, - // - // Double-register FP Binary - InstrItinData<IIC_VBIND, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2, 2]>, - // - // Quad-register FP Binary - // Result written in N5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - InstrItinData<IIC_VBINQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, - // - // Move Immediate - InstrItinData<IIC_VMOVImm, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3]>, - // - // Double-register Permute Move - InstrItinData<IIC_VMOVD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, - // - // Quad-register Permute Move - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 3 for those cases - InstrItinData<IIC_VMOVQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1]>, - // - // Integer to Single-precision Move - InstrItinData<IIC_VMOVIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, - // - // Integer to Double-precision Move - InstrItinData<IIC_VMOVID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, - // - // Single-precision to Integer Move - InstrItinData<IIC_VMOVSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [20, 1]>, - // - // Double-precision to Integer Move - InstrItinData<IIC_VMOVDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>, - // - // Integer to Lane Move - InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, - // - // Double-register Permute - InstrItinData<IIC_VPERMD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>, - // - // Quad-register Permute - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 3 for those cases - InstrItinData<IIC_VPERMQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>, - // - // Quad-register Permute (3 cycle issue) - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 4 for those cases - InstrItinData<IIC_VPERMQ3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>, - // - // Double-register FP Multiple-Accumulate - InstrItinData<IIC_VMACD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>, - // - // Quad-register FP Multiple-Accumulate - // Result written in N9, but that is relative to the last cycle of multicycle, - // so we use 10 for those cases - InstrItinData<IIC_VMACQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>, - // - // Double-register Reciprical Step - InstrItinData<IIC_VRECSD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [9, 2, 2]>, - // - // Quad-register Reciprical Step - InstrItinData<IIC_VRECSQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [10, 2, 2]>, - // - // Double-register Integer Count - InstrItinData<IIC_VCNTiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Quad-register Integer Count - // Result written in N3, but that is relative to the last cycle of multicycle, - // so we use 4 for those cases - InstrItinData<IIC_VCNTiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 2, 2]>, - // - // Double-register Integer Unary - InstrItinData<IIC_VUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2]>, - // - // Quad-register Integer Unary - InstrItinData<IIC_VUNAiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2]>, - // - // Double-register Integer Q-Unary - InstrItinData<IIC_VQUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, - // - // Quad-register Integer CountQ-Unary - InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, - // - // Double-register Integer Binary - InstrItinData<IIC_VBINiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Quad-register Integer Binary - InstrItinData<IIC_VBINiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Double-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, - // - // Quad-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, - // - // Double-register Integer Subtract - InstrItinData<IIC_VSUBiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, - // - // Quad-register Integer Subtract - InstrItinData<IIC_VSUBiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, - // - // Double-register Integer Shift - InstrItinData<IIC_VSHLiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, - // - // Quad-register Integer Shift - InstrItinData<IIC_VSHLiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 1, 1]>, - // - // Double-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, - // - // Quad-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [5, 1, 1]>, - // - // Double-register Integer Pair Add Long - InstrItinData<IIC_VPALiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>, - // - // Quad-register Integer Pair Add Long - InstrItinData<IIC_VPALiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, - // - // Double-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, - // - // Double-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 1]>, - // - // Quad-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 2]>, - // - // Quad-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>, - InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1]>, - // - // Double-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>, - // - // Double-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>, - // - // Quad-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>, - // - // Quad-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>, - InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>, - // - // Double-register VEXT - InstrItinData<IIC_VEXTD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, - // - // Quad-register VEXT - InstrItinData<IIC_VEXTQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, - // - // VTB - InstrItinData<IIC_VTB1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>, - InstrItinData<IIC_VTB2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>, - InstrItinData<IIC_VTB3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>, - InstrItinData<IIC_VTB4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, - // - // VTBX - InstrItinData<IIC_VTBX1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>, - InstrItinData<IIC_VTBX2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>, - InstrItinData<IIC_VTBX3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, - InstrItinData<IIC_VTBX4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> -]>; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp new file mode 100644 index 0000000..c04ee38 --- /dev/null +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARMSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-selectiondag-info" +#include "ARMSelectionDAGInfo.h" +using namespace llvm; + +ARMSelectionDAGInfo::ARMSelectionDAGInfo() { +} + +ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { +} diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h new file mode 100644 index 0000000..afe9a47 --- /dev/null +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ARM subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMSELECTIONDAGINFO_H +#define ARMSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + ARMSelectionDAGInfo(); + ~ARMSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 9e55cd8..b11580a 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -116,7 +116,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. bool -ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const { +ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, + Reloc::Model RelocM) const { if (RelocM == Reloc::Static) return false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index fa56a91..288a19a 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -160,7 +160,7 @@ protected: /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect /// symbol. - bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const; + bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 95f57b7..662e61e 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -102,8 +102,12 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) - PM.add(createARMLoadStoreOptimizationPass()); + if (OptLevel != CodeGenOpt::None) { + if (!Subtarget.isThumb1Only()) + PM.add(createARMLoadStoreOptimizationPass()); + if (Subtarget.hasNEON()) + PM.add(createNEONMoveFixPass()); + } // Expand some pseudo instructions into multiple instructions to allow // proper scheduling. @@ -118,8 +122,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); - if (Subtarget.hasNEON()) - PM.add(createNEONMoveFixPass()); } if (Subtarget.isThumb2()) { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index c32f16c..4e205df 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -71,8 +71,8 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual ARMTargetLowering *getTargetLowering() const { - return const_cast<ARMTargetLowering*>(&TLInfo); + virtual const ARMTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -97,8 +97,8 @@ public: return &InstrInfo->getRegisterInfo(); } - virtual ARMTargetLowering *getTargetLowering() const { - return const_cast<ARMTargetLowering*>(&TLInfo); + virtual const ARMTargetLowering *getTargetLowering() const { + return &TLInfo; } /// returns either Thumb1InstrInfo or Thumb2InstrInfo diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 680d032..091a3b3 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -9,6 +9,7 @@ #include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetMachine.h" @@ -25,12 +26,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { StaticCtorSection = - getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); StaticDtorSection = - getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); } } diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp new file mode 100644 index 0000000..f859d1b --- /dev/null +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -0,0 +1,140 @@ +//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" + +#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" + +#include <string> +#include <map> + +using namespace llvm; + +namespace { + + class ARMBaseAsmLexer : public TargetAsmLexer { + const MCAsmInfo &AsmInfo; + + const AsmToken &lexDefinite() { + return getLexer()->Lex(); + } + + AsmToken LexTokenUAL(); + protected: + typedef std::map <std::string, unsigned> rmap_ty; + + rmap_ty RegisterMap; + + void InitRegisterMap(const TargetRegisterInfo *info) { + unsigned numRegs = info->getNumRegs(); + + for (unsigned i = 0; i < numRegs; ++i) { + const char *regName = info->getName(i); + if (regName) + RegisterMap[regName] = i; + } + } + + unsigned MatchRegisterName(StringRef Name) { + rmap_ty::iterator iter = RegisterMap.find(Name.str()); + if (iter != RegisterMap.end()) + return iter->second; + else + return 0; + } + + AsmToken LexToken() { + if (!Lexer) { + SetError(SMLoc(), "No MCAsmLexer installed"); + return AsmToken(AsmToken::Error, "", 0); + } + + switch (AsmInfo.getAssemblerDialect()) { + default: + SetError(SMLoc(), "Unhandled dialect"); + return AsmToken(AsmToken::Error, "", 0); + case 0: + return LexTokenUAL(); + } + } + public: + ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) + : TargetAsmLexer(T), AsmInfo(MAI) { + } + }; + + class ARMAsmLexer : public ARMBaseAsmLexer { + public: + ARMAsmLexer(const Target &T, const MCAsmInfo &MAI) + : ARMBaseAsmLexer(T, MAI) { + std::string tripleString("arm-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } + }; + + class ThumbAsmLexer : public ARMBaseAsmLexer { + public: + ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI) + : ARMBaseAsmLexer(T, MAI) { + std::string tripleString("thumb-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } + }; +} + +AsmToken ARMBaseAsmLexer::LexTokenUAL() { + const AsmToken &lexedToken = lexDefinite(); + + switch (lexedToken.getKind()) { + default: + return AsmToken(lexedToken); + case AsmToken::Error: + SetError(Lexer->getErrLoc(), Lexer->getErr()); + return AsmToken(lexedToken); + case AsmToken::Identifier: + { + std::string upperCase = lexedToken.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + StringRef lowerRef(lowerCase); + + unsigned regID = MatchRegisterName(lowerRef); + + if (regID) { + return AsmToken(AsmToken::Register, + lexedToken.getString(), + static_cast<int64_t>(regID)); + } else { + return AsmToken(lexedToken); + } + } + } +} + +extern "C" void LLVMInitializeARMAsmLexer() { + RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget); + RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); +} + diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index cf55377..bfa89c4 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -812,8 +812,11 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { return false; } +extern "C" void LLVMInitializeARMAsmLexer(); + /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> X(TheARMTarget); RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); + LLVMInitializeARMAsmLexer(); } diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index 308c6cf..9ba7c01 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -1,6 +1,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmParser + ARMAsmLexer.cpp ARMAsmParser.cpp ) diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 15c5294..80a9d2d 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -21,6 +21,7 @@ #include "ARMMachineFunctionInfo.h" #include "ARMMCInstLower.h" #include "ARMTargetMachine.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Type.h" @@ -239,7 +240,7 @@ namespace { } else if (ACPV->isBlockAddress()) { O << *GetBlockAddressSymbol(ACPV->getBlockAddress()); } else if (ACPV->isGlobalValue()) { - GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = ACPV->getGV(); bool isIndirect = Subtarget->isTargetDarwin() && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); if (!isIndirect) @@ -352,7 +353,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, return; case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); if ((Modifier && strcmp(Modifier, "lo16") == 0) || (TF & ARMII::MO_LO16)) @@ -504,7 +505,6 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; @@ -556,7 +556,6 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; @@ -1110,6 +1109,24 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); + if (MI->getOpcode() == ARM::DBG_VALUE) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); + OS << ']'; + OS << "+"; + printOperand(MI, NOps-2, OS); + OutStreamer.EmitRawText(OS.str()); + return; + } + printInstruction(MI, OS); OutStreamer.EmitRawText(OS.str()); @@ -1129,22 +1146,23 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // avoid out-of-range branches that are due a fundamental limitation of // the way symbol offsets are encoded with the current Darwin ARM // relocations. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>( + getObjFileLowering()); OutStreamer.SwitchSection(TLOFMacho.getTextSection()); OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection()); if (RelocM == Reloc::DynamicNoPIC) { const MCSection *sect = - TLOFMacho.getMachOSection("__TEXT", "__symbol_stub4", - MCSectionMachO::S_SYMBOL_STUBS, - 12, SectionKind::getText()); + OutContext.getMachOSection("__TEXT", "__symbol_stub4", + MCSectionMachO::S_SYMBOL_STUBS, + 12, SectionKind::getText()); OutStreamer.SwitchSection(sect); } else { const MCSection *sect = - TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub4", - MCSectionMachO::S_SYMBOL_STUBS, - 16, SectionKind::getText()); + OutContext.getMachOSection("__TEXT", "__picsymbolstub4", + MCSectionMachO::S_SYMBOL_STUBS, + 16, SectionKind::getText()); OutStreamer.SwitchSection(sect); } } @@ -1201,8 +1219,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index ef5ead6..ac6331f 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -330,7 +331,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; @@ -380,7 +380,6 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; @@ -779,3 +778,22 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, O << '#' << MI->getOperand(OpNum).getImm(); } +void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); +} + +void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); +} + +void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); +} + +void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); +} diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index dd006fc..be0b7c1 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -104,10 +104,10 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex64ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} + void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); // FIXME: Implement. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index bbc0095..29e66e1 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(ARMGenAsmWriter.inc -gen-asm-writer) tablegen(ARMGenDAGISel.inc -gen-dag-isel) tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) +tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) add_llvm_target(ARMCodeGen ARMBaseInstrInfo.cpp @@ -36,6 +37,7 @@ add_llvm_target(ARMCodeGen Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp + ARMSelectionDAGInfo.cpp ) target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 47c3104..4de697e 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -18,6 +18,7 @@ #include "ARMDisassembler.h" #include "ARMDisassemblerCore.h" +#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" @@ -38,7 +39,9 @@ /// #include "../ARMGenDecoderTables.inc" -namespace llvm { +#include "../ARMGenEDInfo.inc" + +using namespace llvm; /// showBitVector - Use the raw_ostream to log a diagnostic message describing /// the inidividual bits of the instruction. @@ -247,27 +250,27 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { case ARM::t2LDR_POST: case ARM::t2LDR_PRE: case ARM::t2LDRi12: case ARM::t2LDRi8: - case ARM::t2LDRs: + case ARM::t2LDRs: case ARM::t2LDRT: return ARM::t2LDRpci; case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE: case ARM::t2LDRBi12: case ARM::t2LDRBi8: - case ARM::t2LDRBs: + case ARM::t2LDRBs: case ARM::t2LDRBT: return ARM::t2LDRBpci; case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE: case ARM::t2LDRHi12: case ARM::t2LDRHi8: - case ARM::t2LDRHs: + case ARM::t2LDRHs: case ARM::t2LDRHT: return ARM::t2LDRHpci; case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE: case ARM::t2LDRSBi12: case ARM::t2LDRSBi8: - case ARM::t2LDRSBs: + case ARM::t2LDRSBs: case ARM::t2LDRSBT: return ARM::t2LDRSBpci; case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE: case ARM::t2LDRSHi12: case ARM::t2LDRSHi8: - case ARM::t2LDRSHs: + case ARM::t2LDRSHs: case ARM::t2LDRSHT: return ARM::t2LDRSHpci; } } @@ -404,7 +407,6 @@ bool ARMDisassembler::getInstruction(MCInst &MI, }); ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); - if (!Builder) return false; @@ -492,11 +494,11 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, }); ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); - Builder->setSession(const_cast<Session *>(&SO)); - if (!Builder) return false; + Builder->SetSession(const_cast<Session *>(&SO)); + if (!Builder->Build(MI, insn)) return false; @@ -506,17 +508,37 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, } // A8.6.50 +// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition. static unsigned short CountITSize(unsigned ITMask) { // First count the trailing zeros of the IT mask. unsigned TZ = CountTrailingZeros_32(ITMask); - assert(TZ <= 3 && "Encoding error"); + if (TZ > 3) { + DEBUG(errs() << "Encoding error: IT Mask '0000'"); + return 0; + } return (4 - TZ); } -/// Init ITState. -void Session::InitIT(unsigned short bits7_0) { +/// Init ITState. Note that at least one bit is always 1 in mask. +bool Session::InitIT(unsigned short bits7_0) { ITCounter = CountITSize(slice(bits7_0, 3, 0)); + if (ITCounter == 0) + return false; + + // A8.6.50 IT + unsigned short FirstCond = slice(bits7_0, 7, 4); + if (FirstCond == 0xF) { + DEBUG(errs() << "Encoding error: IT FirstCond '1111'"); + return false; + } + if (FirstCond == 0xE && ITCounter != 1) { + DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'"); + return false; + } + ITState = bits7_0; + + return true; } /// Update ITState if necessary. @@ -547,4 +569,10 @@ extern "C" void LLVMInitializeARMDisassembler() { createThumbDisassembler); } -} // namespace llvm +EDInstInfo *ARMDisassembler::getEDInfo() const { + return instInfoARM; +} + +EDInstInfo *ThumbDisassembler::getEDInfo() const { + return instInfoARM; +} diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.h b/lib/Target/ARM/Disassembler/ARMDisassembler.h index 44592e0..0a74a38 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.h +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.h @@ -24,6 +24,8 @@ class MCInst; class MemoryObject; class raw_ostream; +struct EDInstInfo; + /// ARMDisassembler - ARM disassembler for all ARM platforms. class ARMDisassembler : public MCDisassembler { public: @@ -42,6 +44,9 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; private: }; @@ -55,7 +60,7 @@ public: Session() : ITCounter(0), ITState(0) {} ~Session() {} /// InitIT - Initializes ITCounter/ITState. - void InitIT(unsigned short bits7_0); + bool InitIT(unsigned short bits7_0); /// UpdateIT - Updates ITCounter/ITState as IT Block progresses. void UpdateIT(); @@ -82,6 +87,9 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; private: Session SO; }; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index db921ef..adb7795 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -13,8 +13,12 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-disassembler" + #include "ARMDisassemblerCore.h" #include "ARMAddressingModes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s @@ -75,7 +79,7 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) { // Return the register enum Based on RegClass and the raw register number. // For DRegPair, see comments below. // FIXME: Auto-gened? -static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister, +static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister, bool DRegPair = false) { if (DRegPair && RegClassID == ARM::QPRRegClassID) { @@ -345,7 +349,9 @@ static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister, } break; } - assert(0 && "Invalid (RegClassID, RawRegister) combination"); + DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n"); + // Encoding error. Mark the builder with error code != 0. + B->SetErr(-1); return 0; } @@ -509,7 +515,7 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, // Inst{3-0} => Rm // Inst{11-8} => Rs static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -529,26 +535,26 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumDefs == 2) { assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID && "Expect 4th register operand"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } // The destination register: RdHi{19-16} or Rd{19-16}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // The two src regsiters: Rn{3-0}, then Rm{11-8}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); OpIdx += 3; // Many multiply instructions (e.g., MLA) have three src registers. // The third register operand is Ra{15-12}. if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -610,7 +616,7 @@ static inline unsigned GetCopOpc(uint32_t insn) { // and friends // static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr"); @@ -631,7 +637,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(decodeRd(insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (PW) { @@ -651,11 +657,11 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) : MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(OneCopOpc ? MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn))) : MCOperand::CreateImm(decodeRn(insn))); @@ -688,18 +694,19 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, // SRSW/SRS: addrmode4:$addr mode_imm // RFEW/RFE: addrmode4:$addr Rn static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (CoprocessorOpcode(Opcode)) - return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; // MRS and MRSsys take one GPR reg Rd. if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 1; return true; @@ -708,7 +715,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BXJ) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); NumOpsAdded = 1; return true; @@ -717,7 +724,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16))); NumOpsAdded = 2; @@ -748,7 +755,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::SRSW || Opcode == ARM::SRS) MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 3; return true; @@ -791,9 +798,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // BLXr9, BXr9 // BRIND, BX_RET static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -806,7 +815,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) { assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); OpIdx = 1; return true; @@ -817,9 +826,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList with GPR:$target and GPR:$idx regs. assert(NumOps == 4 && "Expect 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Fill in the two remaining imm operands to signify build completion. @@ -835,7 +844,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList with GPR::$target reg. assert(NumOps == 3 && "Expect 3 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Fill in the two remaining imm operands to signify build completion. @@ -852,13 +861,13 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // See also ARMAddressingModes.h (Addressing Mode #2). assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -882,14 +891,19 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } -static inline uint32_t getBFCInvMask(uint32_t insn) { +static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { uint32_t lsb = slice(insn, 11, 7); uint32_t msb = slice(insn, 20, 16); uint32_t Val = 0; - assert(lsb <= msb && "Encoding error: lsb > msb"); + if (msb < lsb) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } + for (uint32_t i = lsb; i <= msb; ++i) Val |= (1 << i); - return ~Val; + mask = ~Val; + return true; } static inline bool SaturateOpcode(unsigned Opcode) { @@ -924,7 +938,7 @@ static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) { // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -936,7 +950,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Disassemble register def if there is one. if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -949,7 +963,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (SaturateOpcode(Opcode)) { MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) { @@ -977,14 +991,18 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BFC || Opcode == ARM::BFI) { // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI. MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0 - : getRegisterEnum(ARM::GPRRegClassID, + : getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - MI.addOperand(MCOperand::CreateImm(getBFCInvMask(insn))); + uint32_t mask = 0; + if (!getBFCInvMask(insn, mask)) + return false; + + MI.addOperand(MCOperand::CreateImm(mask)); OpIdx += 2; return true; } if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7))); MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1)); @@ -1000,7 +1018,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, RmRn ? decodeRm(insn) : decodeRn(insn)))); ++OpIdx; } @@ -1021,7 +1039,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // routed here as well. // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form"); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, RmRn? decodeRn(insn) : decodeRm(insn)))); ++OpIdx; } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) { @@ -1046,7 +1064,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -1058,7 +1076,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Disassemble register def if there is one. if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -1071,7 +1089,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (!isUnary) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1094,11 +1112,11 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1. unsigned Rs = slice(insn, 4, 4); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Rs) { // Register-controlled shifts: [Rm, Rs, shift]. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -1121,24 +1139,26 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) { + unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); bool isPrePost = isPrePostLdSt(TID.TSFlags); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost))) + assert(((!isStore && TID.getNumDefs() > 0) || + (isStore && (TID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. if (isPrePost && isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1149,7 +1169,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; @@ -1157,7 +1177,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (isPrePost && !isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1170,7 +1190,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) && "Index mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1194,7 +1214,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -1212,13 +1232,13 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B); } static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); } static bool HasDualReg(unsigned Opcode) { @@ -1232,24 +1252,26 @@ static bool HasDualReg(unsigned Opcode) { } static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) { + unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); bool isPrePost = isPrePostLdSt(TID.TSFlags); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost))) + assert(((!isStore && TID.getNumDefs() > 0) || + (isStore && (TID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. if (isPrePost && isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1262,13 +1284,13 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Fill in LDRD and STRD's second operand. if (DualReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn) + 1))); ++OpIdx; } @@ -1277,7 +1299,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (isPrePost && !isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1290,7 +1312,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) && "Index mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1315,7 +1337,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0); MI.addOperand(MCOperand::CreateImm(Offset)); @@ -1326,13 +1348,14 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, + B); } static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); } // The algorithm for disassembly of LdStMulFrm is different from others because @@ -1340,7 +1363,7 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // and operand 1 (the AM4 mode imm). After operand 3, we need to populate the // reglist with each affected register encoded as an MCOperand. static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5"); @@ -1348,7 +1371,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base, if necessary. if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) { @@ -1372,7 +1395,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegListBits = insn & ((1 << 16) - 1); for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -1388,9 +1411,11 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // // SWP, SWPB: Rd Rm Rn static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1404,29 +1429,29 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Store register Exclusive needs a source operand. if (isStore) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)+1))); ++OpIdx; } } else if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)+1))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1438,7 +1463,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5 // RBIT, REV, REV16, REVSH: Rd Rm static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1452,18 +1477,18 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; if (ThreeReg) { assert(NumOps >= 4 && "Expect >= 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1485,7 +1510,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the // three register operand form. Otherwise, Rn=0b1111 and only Rm is used. static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1499,17 +1524,17 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1591,7 +1616,7 @@ static uint64_t VFPExpandImm(unsigned char byte, unsigned N) { // VCVTDS, VCVTSD: converts between double-precision and single-precision // The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers. static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1"); @@ -1606,7 +1631,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRd(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); ++OpIdx; // Early return for compare with zero instructions. @@ -1620,7 +1645,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRm(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); ++OpIdx; return true; @@ -1631,7 +1656,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList to that of the dst. As far as asm printing is concerned, this // tied_to operand is simply skipped. static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3"); @@ -1647,7 +1672,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRd(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); ++OpIdx; // Skip tied_to operand constraint. @@ -1658,11 +1683,11 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRn(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP)))); ++OpIdx; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRm(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); ++OpIdx; return true; @@ -1675,12 +1700,13 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.297 vcvt (floating-point and fixed-point) // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i)) static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2"); const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297 bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297 @@ -1692,7 +1718,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, int size = slice(insn, 7, 7) == 0 ? 16 : 32; int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5)); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClassID, + getRegisterEnum(B, RegClassID, decodeVFPRd(insn, SP)))); assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && @@ -1712,15 +1738,15 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, if (slice(insn, 18, 18) == 1) { // to_integer operation d = decodeVFPRd(insn, true /* Is Single Precision */); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::SPRRegClassID, d))); + getRegisterEnum(B, ARM::SPRRegClassID, d))); m = decodeVFPRm(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, m))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m))); } else { d = decodeVFPRd(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, d))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d))); m = decodeVFPRm(insn, true /* Is Single Precision */); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::SPRRegClassID, m))); + getRegisterEnum(B, ARM::SPRRegClassID, m))); } NumOpsAdded = 2; } @@ -1731,13 +1757,13 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRS - A8.6.330 // Rt => Rd; Sn => UInt(Vn:N) static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRn(insn, true)))); NumOpsAdded = 2; return true; @@ -1749,29 +1775,29 @@ static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRRS - A8.6.331 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); OpIdx = 2; if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm+1))); OpIdx += 2; } else { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::DPRRegClassID, + getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRm(insn, false)))); ++OpIdx; } @@ -1781,13 +1807,13 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVSR - A8.6.330 // Rt => Rd; Sn => UInt(Vn:N) static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRn(insn, true)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 2; return true; @@ -1799,7 +1825,7 @@ static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRRS - A8.6.331 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3"); @@ -1810,21 +1836,21 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm+1))); OpIdx += 2; } else { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::DPRRegClassID, + getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRm(insn, false)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); OpIdx += 2; return true; @@ -1833,7 +1859,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VFP Load/Store Instructions. // VLDRD, VLDRS, VSTRD, VSTRS static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3"); @@ -1843,9 +1869,9 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Extract Dd/Sd for operand 0. unsigned RegD = decodeVFPRd(insn, isSPVFP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, RegD))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD))); - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); MI.addOperand(MCOperand::CreateReg(Base)); // Next comes the AM5 Opcode. @@ -1865,7 +1891,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5"); @@ -1873,7 +1899,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base, if necessary. if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD || @@ -1886,6 +1912,12 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Next comes the AM5 Opcode. ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); + // Must be either "ia" or "db" submode. + if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { + DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n"); + return false; + } + unsigned char Imm8 = insn & 0xFF; MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8))); @@ -1906,7 +1938,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Fill the variadic part of reglist. unsigned Regs = isSPVFP ? Imm8 : Imm8/2; for (unsigned i = 0; i < Regs; ++i) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD + i))); ++OpIdx; } @@ -1920,7 +1952,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // FCONSTS (SPR and a VFPf32Imm operand) // VMRS/VMSR (GPR operand) static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1935,13 +1967,13 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegEnum = 0; switch (OpInfo[0].RegClass) { case ARM::DPRRegClassID: - RegEnum = getRegisterEnum(ARM::DPRRegClassID, decodeVFPRd(insn, false)); + RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false)); break; case ARM::SPRRegClassID: - RegEnum = getRegisterEnum(ARM::SPRRegClassID, decodeVFPRd(insn, true)); + RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true)); break; case ARM::GPRRegClassID: - RegEnum = getRegisterEnum(ARM::GPRRegClassID, decodeRd(insn)); + RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)); break; default: assert(0 && "Invalid reg class id"); @@ -1986,7 +2018,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // D = Inst{22}, Vd = Inst{15-12} static unsigned decodeNEONRd(uint32_t insn) { return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask); } // Extract/Decode NEON N/Vn: @@ -1997,7 +2029,7 @@ static unsigned decodeNEONRd(uint32_t insn) { // N = Inst{7}, Vn = Inst{19-16} static unsigned decodeNEONRn(uint32_t insn) { return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask); } // Extract/Decode NEON M/Vm: @@ -2008,7 +2040,7 @@ static unsigned decodeNEONRn(uint32_t insn) { // M = Inst{5}, Vm = Inst{3-0} static unsigned decodeNEONRm(uint32_t insn) { return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask); } namespace { @@ -2072,7 +2104,7 @@ static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) { case ESize64: { for (unsigned i = 0; i < 8; ++i) if ((Imm8 >> i) & 1) - Imm64 |= 0xFF << 8*i; + Imm64 |= (uint64_t)0xFF << 8*i; break; } default: @@ -2200,6 +2232,22 @@ static unsigned decodeN3VImm(uint32_t insn) { return (insn >> 8) & 0xF; } +static bool UseDRegPair(unsigned Opcode) { + switch (Opcode) { + default: + return false; + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VST1q8_UPD: + case ARM::VST1q16_UPD: + case ARM::VST1q32_UPD: + case ARM::VST1q64_UPD: + return true; + } +} + // VLD* // D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] // VLD*LN* @@ -2211,7 +2259,8 @@ static unsigned decodeN3VImm(uint32_t insn) { // // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it. static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced) { + unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, + BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2239,7 +2288,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // LLVM Addressing Mode #6. unsigned RmEnum = 0; if (WB && Rm != 13) - RmEnum = getRegisterEnum(ARM::GPRRegClassID, Rm); + RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm); if (Store) { // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's, @@ -2248,14 +2297,14 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); ++OpIdx; } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? OpIdx += 2; @@ -2272,10 +2321,9 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[OpIdx].RegClass; while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { - if (Opcode >= ARM::VST1q16 && Opcode <= ARM::VST1q8) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true))); - else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd))); + MI.addOperand(MCOperand::CreateReg( + getRegisterEnum(B, RegClass, Rd, + UseDRegPair(Opcode)))); Rd += Inc; ++OpIdx; } @@ -2293,23 +2341,22 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[0].RegClass; while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { - if (Opcode >= ARM::VLD1q16 && Opcode <= ARM::VLD1q8) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true))); - else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd))); + MI.addOperand(MCOperand::CreateReg( + getRegisterEnum(B, RegClass, Rd, + UseDRegPair(Opcode)))); Rd += Inc; ++OpIdx; } if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); ++OpIdx; } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? OpIdx += 2; @@ -2342,7 +2389,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // Find out about double-spaced-ness of the Opcode and pass it on to // DisassembleNLdSt0(). static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const StringRef Name = ARMInsts[Opcode].Name; bool DblSpaced = false; @@ -2377,13 +2424,13 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, } return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded, - slice(insn, 21, 21) == 0, DblSpaced); + slice(insn, 21, 21) == 0, DblSpaced, B); } // VMOV (immediate) // Qd/Dd imm static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2395,7 +2442,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, "Expect 1 reg operand followed by 1 imm operand"); // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[0].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, decodeNEONRd(insn)))); ElemSize esize = ESizeNA; @@ -2415,6 +2462,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, case ARM::VMOVv1i64: case ARM::VMOVv2i64: esize = ESize64; + break; default: assert(0 && "Unreachable code!"); return false; @@ -2451,7 +2499,7 @@ enum N2VFlag { // // Others static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag = N2V_None) { + unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) { const TargetInstrDesc &TID = ARMInsts[Opc]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2478,7 +2526,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2490,7 +2538,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRm(insn)))); ++OpIdx; @@ -2523,21 +2571,22 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded); + return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, + N2V_None, B); } static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorConvert_Between_Float_Fixed); + N2V_VectorConvert_Between_Float_Fixed, B); } static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorDupLane); + N2V_VectorDupLane, B); } // Vector Shift [Accumulate] Instructions. @@ -2547,7 +2596,7 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, // VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size) // static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift) { + unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2564,7 +2613,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2579,7 +2628,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); // Qm/Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRm(insn)))); ++OpIdx; @@ -2611,15 +2660,17 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, // Left shift instructions. static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true); + return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true, + B); } // Right shift instructions have different shift amount interpretation. static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false); + return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false, + B); } namespace { @@ -2644,7 +2695,7 @@ enum N3VFlag { // // Others static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag = N3V_None) { + unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2673,7 +2724,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, } // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2688,7 +2739,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, // or // Dm = Inst{5:3-0} => NEON Rm MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(OpInfo[OpIdx].RegClass, + getRegisterEnum(B, OpInfo[OpIdx].RegClass, VdVnVm ? decodeNEONRn(insn) : decodeNEONRm(insn)))); ++OpIdx; @@ -2708,7 +2759,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, : decodeNEONRn(insn); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(OpInfo[OpIdx].RegClass, m))); + getRegisterEnum(B, OpInfo[OpIdx].RegClass, m))); ++OpIdx; if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 @@ -2732,27 +2783,28 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, + N3V_None, B); } static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorShift); + N3V_VectorShift, B); } static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorExtract); + N3V_VectorExtract, B); } static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_Multiply_By_Scalar); + N3V_Multiply_By_Scalar, B); } // Vector Table Lookup @@ -2762,10 +2814,11 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, // VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm // VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::DPRRegClassID && @@ -2786,7 +2839,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Len = slice(insn, 9, 8) + 1; // Dd (the destination vector) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRd(insn)))); ++OpIdx; @@ -2801,7 +2854,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, for (unsigned i = 0; i < Len; ++i) { assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, Rn + i))); ++OpIdx; } @@ -2809,7 +2862,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Dm (the index vector) assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && "Reg operand (index vector) expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRm(insn)))); ++OpIdx; @@ -2825,13 +2878,13 @@ static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Get Lane (move scalar to ARM core register) Instructions. // VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; - assert(NumDefs == 1 && NumOps >= 3 && + assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && OpInfo[2].RegClass == 0 && @@ -2843,11 +2896,11 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, : ESize32); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); // Dn = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRn(insn)))); MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); @@ -2859,13 +2912,13 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Set Lane (move ARM core register to scalar) Instructions. // VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; - assert(NumDefs == 1 && NumOps >= 3 && + assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::DPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && @@ -2879,14 +2932,14 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, : ESize32); // Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRn(insn)))); // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); @@ -2898,7 +2951,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Duplicate Instructions (from ARM core register to all elements). // VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -2911,11 +2964,11 @@ static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegClass = OpInfo[0].RegClass; // Qd/Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass, decodeNEONRn(insn)))); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 2; @@ -2945,13 +2998,13 @@ static inline bool PreLoadOpcode(unsigned Opcode) { } static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 4 operands. // PLDi, PLDWi, PLIi: Rn [+/-]imm12 add = (U == '1') // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) { @@ -2961,7 +3014,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); NumOpsAdded = 2; } else { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -2982,7 +3035,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (MemBarrierInstr(insn)) return true; @@ -3031,7 +3084,7 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } if (PreLoadOpcode(Opcode)) - return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); assert(0 && "Unexpected misc instruction!"); return false; @@ -3147,7 +3200,7 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) { unsigned NumOpsAdded = 0; bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this); - if (!OK) return false; + if (!OK || this->Err != 0) return false; if (NumOpsAdded >= NumOps) return true; @@ -3156,6 +3209,49 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) { return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded); } +// A8.3 Conditional execution +// A8.3.1 Pseudocode details of conditional execution +// Condition bits '111x' indicate the instruction is always executed. +static uint32_t CondCode(uint32_t CondField) { + if (CondField == 0xF) + return ARMCC::AL; + return CondField; +} + +/// DoPredicateOperands - DoPredicateOperands process the predicate operands +/// of some Thumb instructions which come before the reglist operands. It +/// returns true if the two predicate operands have been processed. +bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, + uint32_t /* insn */, unsigned short NumOpsRemaining) { + + assert(NumOpsRemaining > 0 && "Invalid argument"); + + const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + unsigned Idx = MI.getNumOperands(); + + // First, we check whether this instr specifies the PredicateOperand through + // a pair of TargetOperandInfos with isPredicate() property. + if (NumOpsRemaining >= 2 && + OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && + OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + { + // If we are inside an IT block, get the IT condition bits maintained via + // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). + // See also A2.5.2. + if (InITBlock()) + MI.addOperand(MCOperand::CreateImm(GetITCond())); + else + MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); + MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); + return true; + } + + return false; +} + +/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process +/// the possible Predicate and SBitModifier, to build the remaining MCOperand +/// constituents. bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, uint32_t insn, unsigned short NumOpsRemaining) { @@ -3183,27 +3279,24 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // // A8.6.16 B if (Name == "t2Bcc") - MI.addOperand(MCOperand::CreateImm(slice(insn, 25, 22))); + MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22)))); else if (Name == "tBcc") - MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8))); + MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8)))); else MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); } else { - // ARM Instructions. Check condition field. - int64_t CondVal = getCondField(insn); - if (CondVal == 0xF) - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - else - MI.addOperand(MCOperand::CreateImm(CondVal)); + // ARM instructions get their condition field from Inst{31-28}. + MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn)))); } } MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); Idx += 2; NumOpsRemaining -= 2; - if (NumOpsRemaining == 0) - return true; } + if (NumOpsRemaining == 0) + return true; + // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set. if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) { MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0)); @@ -3224,7 +3317,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI, if (!SP) return Status; if (Opcode == ARM::t2IT) - SP->InitIT(slice(insn, 7, 0)); + Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false; else if (InITBlock()) SP->UpdateIT(); @@ -3234,7 +3327,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI, /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num) - : Opcode(opc), Format(format), NumOps(num), SP(0) { + : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) { unsigned Idx = (unsigned)format; assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format"); Disasm = FuncPtrs[Idx]; @@ -3246,6 +3339,11 @@ ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format, /// are responsible for freeing up of the allocated memory. Cacheing can be /// performed by the API clients to improve performance. ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { + // For "Unknown format", fail by returning a NULL pointer. + if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) { + DEBUG(errs() << "Unknown format\n"); + return 0; + } return new ARMBasicMCBuilder(Opcode, Format, ARMInsts[Opcode].getNumOperands()); diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 3075230..b1d90df 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -171,30 +171,51 @@ typedef ARMBasicMCBuilder *BO; typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO Builder); +/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC +/// infrastructure of an MCInst given the Opcode and Format of the instr. +/// Return NULL if it fails to create/return a proper builder. API clients +/// are responsible for freeing up of the allocated memory. Cacheing can be +/// performed by the API clients to improve performance. +extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); + /// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that /// knows how to build up the MCOperand list. class ARMBasicMCBuilder { + friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); unsigned Opcode; ARMFormat Format; unsigned short NumOps; DisassembleFP Disasm; Session *SP; + int Err; // !=0 if the builder encounters some error condition during build. + +private: + /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. + ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num); public: ARMBasicMCBuilder(ARMBasicMCBuilder &B) : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP) - {} - - /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. - ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num); + SP(B.SP) { + Err = 0; + } virtual ~ARMBasicMCBuilder() {} - void setSession(Session *sp) { + void SetSession(Session *sp) { SP = sp; } + void SetErr(int ErrCode) { + Err = ErrCode; + } + + /// DoPredicateOperands - DoPredicateOperands process the predicate operands + /// of some Thumb instructions which come before the reglist operands. It + /// returns true if the two predicate operands have been processed. + bool DoPredicateOperands(MCInst& MI, unsigned Opcode, + uint32_t insn, unsigned short NumOpsRemaning); + /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process /// the possible Predicate and SBitModifier, to build the remaining MCOperand /// constituents. @@ -236,13 +257,6 @@ private: } }; -/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC -/// infrastructure of an MCInst given the Opcode and Format of the instr. -/// Return NULL if it fails to create/return a proper builder. API clients -/// are responsible for freeing up of the allocated memory. Cacheing can be -/// performed by the API clients to improve performance. -extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); - } // namespace llvm #endif diff --git a/lib/Target/ARM/Disassembler/Makefile b/lib/Target/ARM/Disassembler/Makefile new file mode 100644 index 0000000..031b6ac --- /dev/null +++ b/lib/Target/ARM/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMDisassembler + +# Hack: we need to include 'main' arm target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 481f25d..4b2e308 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -193,14 +193,18 @@ static inline unsigned getShiftAmtBits(uint32_t insn) { // A8.6.17 BFC // Encoding T1 ARMv6T2, ARMv7 // LLVM-specific encoding for #<lsb> and #<width> -static inline uint32_t getBitfieldInvMask(uint32_t insn) { +static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) { uint32_t lsb = getImm3(insn) << 2 | getImm2(insn); uint32_t msb = getMsb(insn); uint32_t Val = 0; - assert(lsb <= msb && "Encoding error: lsb > msb"); + if (msb < lsb) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } for (uint32_t i = lsb; i <= msb; ++i) Val |= (1 << i); - return ~Val; + mask = ~Val; + return true; } // A8.4 Shifts applied to a register @@ -342,7 +346,7 @@ static inline unsigned decodeRotate(uint32_t insn) { // Special case: // tMOVSr: tRd tRn static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -360,14 +364,14 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::tGPRRegClassID, + getRegisterEnum(B, ARM::tGPRRegClassID, UseRt ? getT1tRt(insn) : getT1tRd(insn)))); ++OpIdx; // Check whether the next operand to be added is a CCR Register. if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR)); + MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); ++OpIdx; } @@ -376,7 +380,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // For UseRt, the reg operand is tied to the first reg operand. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::tGPRRegClassID, + getRegisterEnum(B, ARM::tGPRRegClassID, UseRt ? getT1tRt(insn) : getT1tRn(insn)))); ++OpIdx; } @@ -388,7 +392,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // The next available operand is either a reg operand or an imm operand. if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // Three register operand instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRm(insn)))); } else { assert(OpInfo[OpIdx].RegClass == 0 && @@ -409,7 +413,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // tMVN, tRSB: tRd CPSR tRn // Others: tRd CPSR tRd(TIED_TO) tRn static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -423,14 +427,14 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, && "Invalid arguments"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); ++OpIdx; // Check whether the next operand to be added is a CCR Register. if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR)); + MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); ++OpIdx; } @@ -449,7 +453,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // Process possible next reg operand. if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // Add tRn operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); ++OpIdx; } @@ -466,7 +470,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // tBX_RET_vararg: Rm // tBLXr_r9: Rm static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // tBX_RET has 0 operand. if (NumOps == 0) @@ -474,7 +478,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // BX/BLX has 1 reg operand: Rm. if (NumOps == 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, getT1Rm(insn)))); NumOpsAdded = 1; return true; @@ -489,7 +493,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand. unsigned RegClass = OpInfo[OpIdx].RegClass; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, + getRegisterEnum(B, RegClass, IsGPR(RegClass) ? getT1Rd(insn) : getT1tRd(insn)))); ++OpIdx; @@ -509,7 +513,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpIdx < NumOps && "More operands expected"); RegClass = OpInfo[OpIdx].RegClass; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, + getRegisterEnum(B, RegClass, IsGPR(RegClass) ? getT1Rm(insn) : getT1tRn(insn)))); ++OpIdx; @@ -521,9 +525,10 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // // tLDRpci: tRt imm8*4 static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && (OpInfo[1].RegClass == 0 && @@ -532,7 +537,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, && "Invalid arguments"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); // And the (imm8 << 2) operand. @@ -564,7 +569,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, // Load/Store Register (reg|imm): tRd tRn imm5 tRm // Load Register Signed Byte|Halfword: tRd tRn tRm static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -581,9 +586,9 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, && "Expect >= 2 operands and first two as thumb reg operands"); // Add the destination reg and the base reg. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); OpIdx = 2; @@ -603,9 +608,10 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, // The next reg operand is tRm, the offset. assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID && "Thumb reg operand expected"); - MI.addOperand(MCOperand::CreateReg(Imm5 ? 0 - : getRegisterEnum(ARM::tGPRRegClassID, - getT1tRm(insn)))); + MI.addOperand(MCOperand::CreateReg( + Imm5 ? 0 + : getRegisterEnum(B, ARM::tGPRRegClassID, + getT1tRm(insn)))); ++OpIdx; return true; @@ -615,12 +621,13 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, // // Load/Store Register SP relative: tRt ARM::SP imm8 static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) - && "Invalid opcode"); + && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -630,7 +637,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[2].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateReg(ARM::SP)); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); @@ -643,11 +650,12 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, // // tADDrPCi: tRt imm8 static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(Opcode == ARM::tADDrPCi && "Invalid opcode"); + assert(Opcode == ARM::tADDrPCi && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && (OpInfo[1].RegClass == 0 && @@ -655,7 +663,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[1].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); NumOpsAdded = 2; @@ -667,11 +675,12 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, // // tADDrSPi: tRt ARM::SP imm8 static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(Opcode == ARM::tADDrSPi && "Invalid opcode"); + assert(Opcode == ARM::tADDrSPi && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -681,7 +690,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[2].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateReg(ARM::SP)); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); @@ -697,23 +706,27 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, // "low registers" is specified by Inst{7-0} // lr|pc is specified by Inst{8} static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Invalid opcode"); + assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode"); unsigned &OpIdx = NumOpsAdded; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx = 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } - // Fill the variadic part of reglist. unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15) | slice(insn, 7, 0); + + // Fill the variadic part of reglist. for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -735,13 +748,13 @@ static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn, // no operand // Others: tRd tRn static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (NumOps == 0) return true; if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP) - return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -799,16 +812,16 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, && "Expect >=2 operands"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); if (OpInfo[1].RegClass == ARM::tGPRRegClassID) { // Two register instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); } else { // CBNZ, CBZ - assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) && "Invalid opcode"); + assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2)); } @@ -823,42 +836,47 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, // tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list // tLDM: tRt AM4ModeImm Pred-Imm Pred-CCR register_list static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD || - Opcode == ARM::tSTM_UPD) && "Invalid opcode"); + Opcode == ARM::tSTM_UPD) && "Unexpected opcode"); unsigned &OpIdx = NumOpsAdded; unsigned tRt = getT1tRt(insn); - unsigned RegListBits = slice(insn, 7, 0); OpIdx = 0; // WB register, if necessary. if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, tRt))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, tRt))); ++OpIdx; // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1 + // A8.6.53 STM / STMIA / STMEA - Encoding T1 MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))); ++OpIdx; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } + + unsigned RegListBits = slice(insn, 7, 0); // Fill the variadic part of reglist. for (unsigned i = 0; i < 8; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, i))); ++OpIdx; } @@ -868,13 +886,15 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, } static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded, + B); } static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded, + B); } // A8.6.16 B Encoding T1 @@ -885,12 +905,14 @@ static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn, // tSVC: imm8 Pred-Imm Pred-CCR // tTRAP: 0 operand (early return) static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO) { if (Opcode == ARM::tTRAP) return true; const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + assert(NumOps == 3 && OpInfo[0].RegClass == 0 && OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID && "Exactly 3 operands expected"); @@ -912,9 +934,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, // // tB: offset static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected"); unsigned Imm11 = getT1Imm11(insn); @@ -952,9 +976,8 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, // 1101xx Conditional branch, and Supervisor Call on page A6-13 // 11100x Unconditional Branch, see B on page A8-44 // -static bool DisassembleThumb1(uint16_t op, - MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { +static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode, + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { unsigned op1 = slice(op, 5, 4); unsigned op2 = slice(op, 3, 2); @@ -963,27 +986,27 @@ static bool DisassembleThumb1(uint16_t op, switch (op1) { case 0: // A6.2.1 Shift (immediate), add, subtract, move, and compare - return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); + return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 1: switch (op2) { case 0: switch (op3) { case 0: // A6.2.2 Data-processing - return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); + return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 1: // A6.2.3 Special data instructions and branch and exchange - return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded, + B); default: // A8.6.59 LDR (literal) - return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B); } break; default: // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, + B); break; } break; @@ -991,21 +1014,24 @@ static bool DisassembleThumb1(uint16_t op, switch (op2) { case 0: // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, + B); case 1: // A6.2.4 Load/store single data item - return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 2: if (op3 <= 1) { // A8.6.10 ADR - return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } else { // A8.6.8 ADD (SP plus immediate) - return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } default: // A6.2.5 Miscellaneous 16-bit instructions - return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B); } break; case 3: @@ -1013,17 +1039,17 @@ static bool DisassembleThumb1(uint16_t op, case 0: if (op3 <= 1) { // A8.6.189 STM / STMIA / STMEA - return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else { // A8.6.53 LDM / LDMIA / LDMFD - return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } case 1: // A6.2.6 Conditional branch, and Supervisor Call - return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 2: // Unconditional Branch, see B on page A8-44 - return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B); default: assert(0 && "Unreachable code"); break; @@ -1079,32 +1105,32 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn, // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 1; return true; } static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (Thumb2SRSOpcode(Opcode)) return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded); if (Thumb2RFEOpcode(Opcode)) - return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B); assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD) - && "Invalid opcode"); + && "Unexpected opcode"); assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5"); unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base. if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) { @@ -1120,15 +1146,19 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } - // Fill the variadic part of reglist. unsigned RegListBits = insn & ((1 << 16) - 1); + + // Fill the variadic part of reglist. for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -1144,9 +1174,11 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, // t2STREXD: Rm Rd Rs Rn // t2STREXB, t2STREXH: Rm Rd Rn static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1163,25 +1195,25 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand for store. if (isStore) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, isSW ? decodeRs(insn) : decodeRm(insn)))); ++OpIdx; } // Source operand for store and destination operand for load. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Thumb2 doubleword complication: with an extra source/destination operand. if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1198,9 +1230,10 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, // Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for // disassembly only and do not have a tied_to writeback base register operand. static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 4 && OpInfo[0].RegClass == ARM::GPRRegClassID @@ -1210,11 +1243,11 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, && "Expect >= 4 operands and first 3 as reg operands"); // Add the <Rt> <Rt2> operands. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // Finally add (+/-)imm8*4, depending on the U bit. @@ -1235,15 +1268,15 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, // // t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "Expect >= 2 operands"); // The generic version of TBB/TBH needs a base register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // Add the index register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); NumOpsAdded = 2; @@ -1278,7 +1311,7 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) { // nothing else, because the shift amount is already specified. // Similar case holds for t2MOVrx, t2ADDrr, ..., etc. static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1293,7 +1326,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[3].RegClass == 0 && "Exactlt 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateReg(0)); MI.addOperand(MCOperand::CreateImm(0)); @@ -1315,7 +1348,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, // Build the register operands, followed by the constant shift specifier. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1324,15 +1357,18 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { // Process tied_to operand constraint. MI.addOperand(MI.getOperand(Idx)); - } else { - assert(!NoDstReg && "Internal error"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + ++OpIdx; + } else if (!NoDstReg) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); + ++OpIdx; + } else { + DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n"); + return false; } - ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1373,7 +1409,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, // // ModImm = ThumbExpandImm(i:imm3:imm8) static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1389,13 +1425,16 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, // Build the register operands, followed by the modified immediate. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; if (TwoReg) { - assert(!NoDstReg && "Internal error"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + if (NoDstReg) { + DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); + return false; + } + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1437,7 +1476,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { case ARM::t2USAT16: return slice(insn, 3, 0); default: - assert(0 && "Invalid opcode passed in"); + assert(0 && "Unexpected opcode"); return 0; } } @@ -1459,7 +1498,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { // o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt // o t2SSAT16, t2USAT16: Rs sat_pos Rn static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1474,7 +1513,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, // Build the register operand(s), followed by the immediate(s). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; @@ -1482,7 +1521,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (Thumb2SaturateOpcode(Opcode)) { MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) { @@ -1510,7 +1549,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MI.getOperand(Idx)); } else { // Add src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); } ++OpIdx; @@ -1528,15 +1567,22 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC) - MI.addOperand(MCOperand::CreateImm(getBitfieldInvMask(insn))); - else { + else if (Opcode == ARM::t2BFC) { + uint32_t mask = 0; + if (getBitfieldInvMask(insn, mask)) + MI.addOperand(MCOperand::CreateImm(mask)); + else + return false; + } else { // Handle the case of: lsb width assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX || - Opcode == ARM::t2BFI) && "Invalid opcode"); + Opcode == ARM::t2BFI) && "Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getLsb(insn))); if (Opcode == ARM::t2BFI) { - assert(getMsb(insn) >= getLsb(insn) && "Encoding error"); + if (getMsb(insn) < getLsb(insn)) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1)); } else MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); @@ -1585,7 +1631,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // t2MSR/t2MSRsys -> Rn mask=Inst{11-8} // t2SMC -> imm4 = Inst{19-16} static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (NumOps == 0) return true; @@ -1627,21 +1673,21 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, // MRS and MRSsys take one GPR reg Rs. if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); NumOpsAdded = 1; return true; } // BXJ takes one GPR reg Rn. if (Opcode == ARM::t2BXJ) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 1; return true; } // MSR and MSRsys take one GPR reg Rn, followed by the mask. if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8))); NumOpsAdded = 2; @@ -1659,7 +1705,7 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, switch (Opcode) { default: - assert(0 && "Unreachable code"); + assert(0 && "Unexpected opcode"); return false; case ARM::t2B: Offset = decodeImm32_B_EncodingT4(insn); @@ -1700,7 +1746,7 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) { } static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 3 operands. // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8 @@ -1718,12 +1764,12 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, OpInfo[0].RegClass == ARM::GPRRegClassID && "Expect >= 2 operands and first one as reg operand"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() @@ -1765,9 +1811,10 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, // These instrs calculate an address from the PC value and an immediate offset. // Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1) static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID && @@ -1776,7 +1823,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // Build the register operand, followed by the (+/-)imm12 immediate. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(MCOperand::CreateImm(decodeImm12(insn))); @@ -1812,16 +1859,16 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // Delegates to DisassembleThumb2PreLoad() for preload data/instruction. // Delegates to DisassembleThumb2Ldpci() for load * literal operations. static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { unsigned Rn = decodeRn(insn); if (Thumb2PreloadOpcode(Opcode)) - return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B); // See, for example, A6.3.7 Load word: Table A6-18 Load word. if (Load && Rn == 15) - return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1870,13 +1917,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, Imm = decodeImm8(insn); } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R0))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R0))); ++OpIdx; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R1))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R1))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,R2))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R2))); ++OpIdx; } @@ -1900,7 +1950,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, // // Miscellaneous operations: Rs [Rn] Rm static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1917,17 +1967,17 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1954,7 +2004,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, // Unsigned Sum of Absolute Differences [and Accumulate] // Rs Rn Rm [Ra=Inst{15-12}] static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -1968,17 +2018,17 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = FourReg ? 4 : 3; @@ -1999,7 +2049,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, // // Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -2014,16 +2064,16 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, // Build the register operands. if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (FourReg) @@ -2059,38 +2109,41 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, // 1xxxxxx - Coprocessor instructions on page A6-40 // static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, - MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, + unsigned &NumOpsAdded, BO B) { switch (op1) { case 1: if (slice(op2, 6, 5) == 0) { if (slice(op2, 2, 2) == 0) { // Load/store multiple. - return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } // Load/store dual, load/store exclusive, table branch, otherwise. - assert(slice(op2, 2, 2) == 1 && "Encoding error"); + assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!"); if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) || (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) { // Load/store exclusive. - return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST || Opcode == ARM::t2STRDi8 || Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) { // Load/store dual. - return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) { // Table branch. - return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B); } } else if (slice(op2, 6, 5) == 1) { // Data-processing (shifted register). - return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); } // FIXME: A6.3.18 Coprocessor instructions @@ -2101,14 +2154,17 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, if (op == 0) { if (slice(op2, 5, 5) == 0) { // Data-processing (modified immediate) - return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } else { // Data-processing (plain binary immediate) - return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } } else { // Branches and miscellaneous control on page A6-20. - return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } break; @@ -2119,7 +2175,8 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, if (slice(op2, 0, 0) == 0) { if (slice(op2, 4, 4) == 0) { // Store single data item on page A6-30 - return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded); + return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded, + B); } else { // FIXME: Advanced SIMD element or structure load/store instructions. // But see ThumbDisassembler::getInstruction(). @@ -2127,19 +2184,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, } } else { // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word - return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded); + return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B); } break; case 1: if (slice(op2, 4, 4) == 0) { // A6.3.12 Data-processing (register) - return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else if (slice(op2, 3, 3) == 0) { // A6.3.16 Multiply, multiply accumulate, and absolute difference - return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else { // A6.3.17 Long multiply, long multiply accumulate, and divide - return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } break; default: @@ -2151,7 +2209,7 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, break; default: - assert(0 && "Encoding error for Thumb2 instruction!"); + assert(0 && "Thumb2 encoding error!"); break; } @@ -2174,8 +2232,10 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned bits15_11 = slice(HalfWord, 15, 11); // A6.1 Thumb instruction set encoding - assert((bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) && - "Bits [15:11] of first halfword of a Thumb2 instruction out of range"); + if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) { + assert("Bits[15:11] first halfword of Thumb2 instruction is out of range"); + return false; + } // A6.3 32-bit Thumb instruction encoding @@ -2183,5 +2243,6 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, uint16_t op2 = slice(HalfWord, 10, 4); uint16_t op = slice(insn, 15, 15); - return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded, + Builder); } diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index a8dd38c..9e3ff29 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -16,8 +16,9 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ - ARMGenCodeEmitter.inc ARMGenCallingConv.inc + ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ + ARMGenDecoderTables.inc ARMGenEDInfo.inc -DIRS = AsmPrinter AsmParser TargetInfo +DIRS = AsmPrinter AsmParser Disassembler TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index a5dfcb3..2f635fe 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -36,9 +36,12 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; +namespace llvm { extern cl::opt<bool> ReuseFrameIndexVals; +} + +using namespace llvm; Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) @@ -56,7 +59,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get( + const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); @@ -461,6 +464,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset -= AFI->getFramePtrSpillOffset(); } + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; + } + unsigned Opcode = MI.getOpcode(); const TargetInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index de46056..b143bd9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -44,18 +44,22 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (DestRC == ARM::GPRRegisterClass && - SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (DestRC == ARM::GPRRegisterClass && - SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (DestRC == ARM::tGPRRegisterClass && - SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); + return true; + } + } else if (DestRC == ARM::tGPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); + return true; + } } // Handle SPR, DPR, and QPR copies. diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index f24d3e2..07dd0be 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -52,7 +52,7 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get( + const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 2bc75f2..8fe2e42 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -656,15 +656,8 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; - bool LiveCPSR = false; // Yes, CPSR could be livein. - for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) { - if (*I == ARM::CPSR) { - LiveCPSR = true; - break; - } - } + bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; |