diff options
Diffstat (limited to 'contrib/llvm/lib/Target/ARM')
87 files changed, 13766 insertions, 4904 deletions
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h index 16d0da3..2a1e8e4 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.h +++ b/contrib/llvm/lib/Target/ARM/ARM.h @@ -1,4 +1,4 @@ -//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===// +//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,9 +18,7 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" -#include <cassert> namespace llvm { diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 5c727ad..9b0cb0c 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -1,4 +1,4 @@ -//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===// +//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -23,9 +23,6 @@ include "llvm/Target/Target.td" def ModeThumb : SubtargetFeature<"thumb-mode", "InThumbMode", "true", "Thumb mode">; -def ModeNaCl : SubtargetFeature<"nacl-mode", "InNaClMode", "true", - "Native client mode">; - //===----------------------------------------------------------------------===// // ARM Subtarget features. // @@ -35,6 +32,9 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true", def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true", "Enable VFP3 instructions", [FeatureVFP2]>; +def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", + "Enable VFP4 instructions", + [FeatureVFP3]>; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; @@ -86,6 +86,11 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +// Some processors perform return stack prediction. CodeGen should avoid issue +// "normal" call instructions to callees which do not return. +def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true", + "Has return address stack">; + /// Some M architectures don't have the DSP extension (v7E-M vs. v7M) def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", "Supports v7 DSP instructions in Thumb2">; @@ -201,13 +206,14 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, // V7a Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2]>; + FeatureDSPThumb2, FeatureHasRAS]>; def : Processor<"cortex-a9", CortexA9Itineraries, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2]>; + FeatureDSPThumb2, FeatureHasRAS]>; def : Processor<"cortex-a9-mp", CortexA9Itineraries, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureMP]>; + FeatureDSPThumb2, FeatureMP, + FeatureHasRAS]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index ea3319f..410790a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -13,10 +13,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "ARM.h" #include "ARMAsmPrinter.h" +#include "ARM.h" #include "ARMBuildAttrs.h" -#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMTargetMachine.h" @@ -35,7 +34,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectStreamer.h" @@ -44,10 +42,7 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -85,15 +80,15 @@ namespace { void EmitTextAttribute(unsigned Attribute, StringRef String) { switch (Attribute) { + default: llvm_unreachable("Unsupported Text attribute in ASM Mode"); case ARMBuildAttrs::CPU_name: - Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String)); + Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower()); break; /* GAS requires .fpu to be emitted regardless of EABI attribute */ case ARMBuildAttrs::Advanced_SIMD_arch: case ARMBuildAttrs::VFP_arch: - Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String)); + Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower()); break; - default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } } void Finish() { } @@ -197,15 +192,14 @@ namespace { AttributeItemType item = Contents[i]; Streamer.EmitULEB128IntValue(item.Tag, 0); switch (item.Type) { + default: llvm_unreachable("Invalid attribute type"); case AttributeItemType::NumericAttribute: Streamer.EmitULEB128IntValue(item.IntValue, 0); break; case AttributeItemType::TextAttribute: - Streamer.EmitBytes(UppercaseString(item.StringValue), 0); + Streamer.EmitBytes(item.StringValue.upper(), 0); Streamer.EmitIntValue(0, 1); // '\0' break; - default: - assert(0 && "Invalid attribute type"); } } @@ -300,6 +294,22 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitLabel(CurrentFnSym); } +void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { + uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); + assert(Size && "C++ constructor pointer had zero size!"); + + const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); + assert(GV && "C++ constructor pointer was not a GlobalValue!"); + + const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV), + (Subtarget->isTargetDarwin() + ? MCSymbolRefExpr::VK_None + : MCSymbolRefExpr::VK_ARM_TARGET1), + OutContext); + + OutStreamer.EmitValue(E, Size); +} + /// runOnMachineFunction - This uses the EmitInstruction() /// method to print assembly for each instruction. /// @@ -316,8 +326,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned TF = MO.getTargetFlags(); switch (MO.getType()) { - default: - assert(0 && "<unknown operand type>"); + default: llvm_unreachable("<unknown operand type>"); case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); @@ -494,11 +503,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } - // These modifiers are not yet supported. - case 'p': // The high single-precision register of a VFP double-precision - // register. case 'e': // The low doubleword register of a NEON quad register. - case 'f': // The high doubleword register of a NEON quad register. + case 'f': { // The high doubleword register of a NEON quad register. + if (!MI->getOperand(OpNum).isReg()) + return true; + unsigned Reg = MI->getOperand(OpNum).getReg(); + if (!ARM::QPRRegClass.contains(Reg)) + return true; + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? + ARM::dsub_0 : ARM::dsub_1); + O << ARMInstPrinter::getRegisterName(SubReg); + return false; + } + + // These modifiers are not yet supported. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. case 'H': // The highest-numbered register of a pair. return true; @@ -576,10 +595,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified); // Emit ARM Build Attributes - if (Subtarget->isTargetELF()) { - + if (Subtarget->isTargetELF()) emitAttributes(); - } } @@ -710,15 +727,26 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/vfpv3/vfpv2 for .fpu parameters */ - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); + * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (Subtarget->hasVFP4()) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + "neon-vfpv4"); + else + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); /* If emitted for NEON, omit from VFP below, since you can have both * NEON and VFP in build attributes but only one .fpu */ emitFPU = false; } + /* VFPv4 + .fpu */ + if (Subtarget->hasVFP4()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv4A); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4"); + /* VFPv3 + .fpu */ - if (Subtarget->hasVFP3()) { + } else if (Subtarget->hasVFP3()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv3A); if (emitFPU) @@ -740,14 +768,14 @@ void ARMAsmPrinter::emitAttributes() { } // Signal various FP modes. - if (!UnsafeFPMath) { + if (!TM.Options.UnsafeFPMath) { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); } - if (NoInfsFPMath && NoNaNsFPMath) + if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::Allowed); else @@ -760,7 +788,7 @@ void ARMAsmPrinter::emitAttributes() { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) { + if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) { AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3); AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1); } @@ -808,7 +836,6 @@ static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber, static MCSymbolRefExpr::VariantKind getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { switch (Modifier) { - default: llvm_unreachable("Unknown modifier!"); case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None; case ARMCP::TLSGD: return MCSymbolRefExpr::VK_ARM_TLSGD; case ARMCP::TPOFF: return MCSymbolRefExpr::VK_ARM_TPOFF; @@ -816,7 +843,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { case ARMCP::GOT: return MCSymbolRefExpr::VK_ARM_GOT; case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_ARM_GOTOFF; } - return MCSymbolRefExpr::VK_None; + llvm_unreachable("Invalid ARMCPModifier!"); } MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { @@ -1070,7 +1097,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } // Try to figure out the unwinding opcode out of src / dst regs. - if (MI->getDesc().mayStore()) { + if (MI->mayStore()) { // Register saves. assert(DstReg == ARM::SP && "Only stack pointer as a destination reg is supported"); @@ -1084,7 +1111,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { switch (Opc) { default: MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. StartOp = 2; NumOffset = 2; @@ -1099,6 +1126,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { break; case ARM::STR_PRE_IMM: case ARM::STR_PRE_REG: + case ARM::t2STR_PRE: assert(MI->getOperand(2).getReg() == ARM::SP && "Only stack pointer as a source reg is supported"); RegList.push_back(SrcReg); @@ -1112,14 +1140,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { switch (Opc) { default: MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); case ARM::MOVr: + case ARM::tMOVr: Offset = 0; break; case ARM::ADDri: Offset = -MI->getOperand(2).getImm(); break; case ARM::SUBri: + case ARM::t2SUBri: Offset = MI->getOperand(2).getImm(); break; case ARM::tSUBspi: @@ -1157,16 +1187,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { OutStreamer.EmitPad(Offset); } else { MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } } else if (DstReg == ARM::SP) { // FIXME: .movsp goes here MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } else { MI->dump(); - assert(0 && "Unsupported opcode for unwinding information"); + llvm_unreachable("Unsupported opcode for unwinding information"); } } } @@ -1195,7 +1225,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Check for manual lowerings. unsigned Opc = MI->getOpcode(); switch (Opc) { - case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass"); + case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); case ARM::DBG_VALUE: { if (isVerbose() && OutStreamer.hasRawTextSupport()) { SmallString<128> TmpStr; @@ -1237,7 +1267,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } // Darwin call instructions are just normal call instructions with different // clobber semantics (they clobber R9). - case ARM::BXr9_CALL: case ARM::BX_CALL: { { MCInst TmpInst; @@ -1259,7 +1288,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::tBXr9_CALL: case ARM::tBX_CALL: { { MCInst TmpInst; @@ -1282,7 +1310,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::BMOVPCRXr9_CALL: case ARM::BMOVPCRX_CALL: { { MCInst TmpInst; @@ -1310,6 +1337,58 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } + case ARM::BMOVPCB_CALL: { + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + // Add 's' bit operand (always reg0 for this) + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::Bcc); + const GlobalValue *GV = MI->getOperand(0).getGlobal(); + MCSymbol *GVSym = Mang->getSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } + case ARM::t2BMOVPCB_CALL: { + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::tMOVr); + TmpInst.addOperand(MCOperand::CreateReg(ARM::LR)); + TmpInst.addOperand(MCOperand::CreateReg(ARM::PC)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::t2B); + const GlobalValue *GV = MI->getOperand(0).getGlobal(); + MCSymbol *GVSym = Mang->getSymbol(GV); + const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); + TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + } + return; + } case ARM::MOVi16_ga_pcrel: case ARM::t2MOVi16_ga_pcrel: { MCInst TmpInst; @@ -1482,11 +1561,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third /// is the size in bytes of this constant pool entry. + /// The required alignment is specified on the basic block holding this MI. unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); - EmitAlignment(2); - // Mark the constant pool entry as data if we're not already in a data // region. OutStreamer.EmitDataRegion(); @@ -1898,10 +1976,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } { MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRr); + TmpInst.setOpcode(ARM::tLDRi); TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + TmpInst.addOperand(MCOperand::CreateImm(0)); // Predicate. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); @@ -1935,4 +2013,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() { RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); } - diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h index 7741fc4..af3f75a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===// +//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -73,6 +73,7 @@ public: virtual void EmitFunctionEntryLabel(); void EmitStartOfAsmFile(Module &M); void EmitEndOfAsmFile(Module &M); + void EmitXXStructor(const Constant *CV); // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); @@ -106,7 +107,7 @@ public: if (!Subtarget->isTargetDarwin()) return 0; return Subtarget->isThumb() ? - llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 408edfc..c6280f8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// +//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -13,10 +13,10 @@ #include "ARMBaseInstrInfo.h" #include "ARM.h" +#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" @@ -47,7 +46,7 @@ EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); static cl::opt<bool> -WidenVMOVS("widen-vmovs", cl::Hidden, +WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), cl::desc("Widen ARM vmovs to vmovd when possible")); /// ARM_MLxEntry - Record information about MLA / MLS instructions. @@ -147,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - bool isLoad = !MCID.mayStore(); + bool isLoad = !MI->mayStore(); const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); const MachineOperand &Base = MI->getOperand(2); const MachineOperand &Offset = MI->getOperand(NumOps-3); @@ -157,9 +156,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, unsigned OffImm = MI->getOperand(NumOps-2).getImm(); ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); switch (AddrMode) { - default: - assert(false && "Unknown indexed op!"); - return NULL; + default: llvm_unreachable("Unknown indexed op!"); case ARMII::AddrMode2: { bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; unsigned Amt = ARM_AM::getAM2Offset(OffImm); @@ -440,6 +437,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { return false; } +bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { + if (MI->isBundle()) { + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + int PIdx = I->findFirstPredOperandIdx(); + if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL) + return true; + } + return false; + } + + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; +} + bool ARMBaseInstrInfo:: PredicateInstruction(MachineInstr *MI, const SmallVectorImpl<MachineOperand> &Pred) const { @@ -490,15 +503,11 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { - // FIXME: This confuses implicit_def with optional CPSR def. - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef()) - return false; - bool Found = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == ARM::CPSR) { + if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) || + (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) { Pred.push_back(MO); Found = true; } @@ -511,11 +520,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, /// By default, this returns true for every instruction with a /// PredicateOperand. bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) + if (!MI->isPredicable()) return false; - if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); return AFI->isThumb2Function(); @@ -544,83 +552,95 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { if (MCID.getSize()) return MCID.getSize(); - // If this machine instr is an inline asm, measure it. - if (MI->getOpcode() == ARM::INLINEASM) - return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); - if (MI->isLabel()) - return 0; + // If this machine instr is an inline asm, measure it. + if (MI->getOpcode() == ARM::INLINEASM) + return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); + if (MI->isLabel()) + return 0; unsigned Opc = MI->getOpcode(); - switch (Opc) { - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::PROLOG_LABEL: - case TargetOpcode::EH_LABEL: - case TargetOpcode::DBG_VALUE: - return 0; - case ARM::MOVi16_ga_pcrel: - case ARM::MOVTi16_ga_pcrel: - case ARM::t2MOVi16_ga_pcrel: - case ARM::t2MOVTi16_ga_pcrel: - return 4; - case ARM::MOVi32imm: - case ARM::t2MOVi32imm: - return 8; - case ARM::CONSTPOOL_ENTRY: - // If this machine instr is a constant pool entry, its size is recorded as - // operand #2. - return MI->getOperand(2).getImm(); - case ARM::Int_eh_sjlj_longjmp: - return 16; - case ARM::tInt_eh_sjlj_longjmp: - return 10; - case ARM::Int_eh_sjlj_setjmp: - case ARM::Int_eh_sjlj_setjmp_nofp: - return 20; - case ARM::tInt_eh_sjlj_setjmp: - case ARM::t2Int_eh_sjlj_setjmp: - case ARM::t2Int_eh_sjlj_setjmp_nofp: - return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::tBR_JTr: - case ARM::t2BR_JT: - case ARM::t2TBB_JT: - case ARM::t2TBH_JT: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. For TBB, each - // entry is one byte; TBH two byte each. - unsigned EntrySize = (Opc == ARM::t2TBB_JT) - ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); - unsigned NumOps = MCID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != 0); - const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = getNumJTEntries(JT, JTI); - if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) - // Make sure the instruction that follows TBB is 2-byte aligned. - // FIXME: Constant island pass should insert an "ALIGN" instruction - // instead. - ++NumEntries; - return NumEntries * EntrySize + InstSize; - } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; - } - return 0; // Not reached + switch (Opc) { + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::PROLOG_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: + return 0; + case TargetOpcode::BUNDLE: + return getInstBundleLength(MI); + case ARM::MOVi16_ga_pcrel: + case ARM::MOVTi16_ga_pcrel: + case ARM::t2MOVi16_ga_pcrel: + case ARM::t2MOVTi16_ga_pcrel: + return 4; + case ARM::MOVi32imm: + case ARM::t2MOVi32imm: + return 8; + case ARM::CONSTPOOL_ENTRY: + // If this machine instr is a constant pool entry, its size is recorded as + // operand #2. + return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_longjmp: + return 16; + case ARM::tInt_eh_sjlj_longjmp: + return 10; + case ARM::Int_eh_sjlj_setjmp: + case ARM::Int_eh_sjlj_setjmp_nofp: + return 20; + case ARM::tInt_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp_nofp: + return 12; + case ARM::BR_JTr: + case ARM::BR_JTm: + case ARM::BR_JTadd: + case ARM::tBR_JTr: + case ARM::t2BR_JT: + case ARM::t2TBB_JT: + case ARM::t2TBH_JT: { + // These are jumptable branches, i.e. a branch followed by an inlined + // jumptable. The size is 4 + 4 * number of entries. For TBB, each + // entry is one byte; TBH two byte each. + unsigned EntrySize = (Opc == ARM::t2TBB_JT) + ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); + unsigned NumOps = MCID.getNumOperands(); + MachineOperand JTOP = + MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); + unsigned JTI = JTOP.getIndex(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + assert(MJTI != 0); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + assert(JTI < JT.size()); + // Thumb instructions are 2 byte aligned, but JT entries are 4 byte + // 4 aligned. The assembler / linker may add 2 byte padding just before + // the JT entries. The size does not include this padding; the + // constant islands pass does separate bookkeeping for it. + // FIXME: If we know the size of the function is less than (1 << 16) *2 + // bytes, we can use 16-bit entries instead. Then there won't be an + // alignment issue. + unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; + unsigned NumEntries = getNumJTEntries(JT, JTI); + if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) + // Make sure the instruction that follows TBB is 2-byte aligned. + // FIXME: Constant island pass should insert an "ALIGN" instruction + // instead. + ++NumEntries; + return NumEntries * EntrySize + InstSize; + } + default: + // Otherwise, pseudo-instruction sizes are zero. + return 0; + } +} + +unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += GetInstSizeInBytes(&*I); + } + return Size; } void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -660,29 +680,51 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place. - if (ARM::QQPRRegClass.contains(DestReg, SrcReg) || - ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + // Handle register classes that require multiple instructions. + unsigned BeginIdx = 0; + unsigned SubRegs = 0; + unsigned Spacing = 1; + + // Use VORRq when possible. + if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; + else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; + // Fall back to VMOVD. + else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; + else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; + else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; + + else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; + else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; + else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; + + if (Opc) { const TargetRegisterInfo *TRI = &getRegisterInfo(); - assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum."); - unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ? - ARM::qsub_1 : ARM::qsub_3; - for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, i); - unsigned Src = TRI->getSubReg(SrcReg, i); - MachineInstrBuilder Mov = - AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq)) - .addReg(Dst, RegState::Define) - .addReg(Src, getKillRegState(KillSrc)) - .addReg(Src, getKillRegState(KillSrc))); - if (i == EndSubReg) { - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - } + MachineInstrBuilder Mov; + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); + Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src)); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); return; } + llvm_unreachable("Impossible reg-to-reg copy"); } @@ -710,8 +752,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo( - PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); @@ -738,9 +779,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::QPRRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) + if (ARM::DPairRegClass.hasSubClassEq(RC)) { + // Use aligned spills if the stack can be realigned. + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); @@ -825,7 +867,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::VST1q64Pseudo: + case ARM::VST1q64: if (MI->getOperand(0).isFI() && MI->getOperand(2).getSubReg() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -847,7 +889,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); + return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); } void ARMBaseInstrInfo:: @@ -862,7 +904,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); @@ -887,9 +929,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::QPRRegClass.hasSubClassEq(RC)) { - if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) + if (ARM::DPairRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); } else { @@ -911,11 +953,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } } else llvm_unreachable("Unknown reg class!"); @@ -926,15 +969,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) .addFrameIndex(FI)) .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); - MIB.addReg(DestReg, RegState::Define | RegState::Implicit); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); break; @@ -971,7 +1015,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::VLD1q64Pseudo: + case ARM::VLD1q64: if (MI->getOperand(1).isFI() && MI->getOperand(0).getSubReg() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -993,7 +1037,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const { const MachineMemOperand *Dummy; - return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); + return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ @@ -1359,7 +1403,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) + if (MI->isTerminator() || MI->isLabel()) return true; // Treat the start of the IT block as a scheduling boundary, but schedule @@ -1380,7 +1424,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, // saves compile time, because it doesn't require every single // stack slot reference to depend on the instruction that does the // modification. - if (MI->definesRegister(ARM::SP)) + // Calls don't actually change the stack pointer, even if they have imp-defs. + // No ARM calling conventions change the stack pointer. (X86 calling + // conventions sometimes do). + if (!MI->isCall() && MI->definesRegister(ARM::SP)) return true; return false; @@ -1445,15 +1492,37 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { int llvm::getMatchingCondBranchOpcode(int Opc) { if (Opc == ARM::B) return ARM::Bcc; - else if (Opc == ARM::tB) + if (Opc == ARM::tB) return ARM::tBcc; - else if (Opc == ARM::t2B) - return ARM::t2Bcc; + if (Opc == ARM::t2B) + return ARM::t2Bcc; llvm_unreachable("Unknown unconditional branch opcode!"); - return 0; } +/// commuteInstruction - Handle commutable instructions. +MachineInstr * +ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + switch (MI->getOpcode()) { + case ARM::MOVCCr: + case ARM::t2MOVCCr: { + // MOVCC can be commuted by inverting the condition. + unsigned PredReg = 0; + ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); + // MOVCC AL can't be inverted. Shouldn't happen. + if (CC == ARMCC::AL || PredReg != ARM::CPSR) + return NULL; + MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + if (!MI) + return NULL; + // After swapping the MOVCC operands, also invert the condition. + MI->getOperand(MI->findFirstPredOperandIdx()) + .setImm(ARMCC::getOppositeCondition(CC)); + return MI; + } + } + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); +} /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the /// instruction is encoded with an 'S' bit is determined by the optional CPSR @@ -1478,7 +1547,6 @@ static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { {ARM::SUBSrsr, ARM::SUBrsr}, {ARM::RSBSri, ARM::RSBri}, - {ARM::RSBSrr, ARM::RSBrr}, {ARM::RSBSrsi, ARM::RSBrsi}, {ARM::RSBSrsr, ARM::RSBrsr}, @@ -1625,7 +1693,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } default: llvm_unreachable("Unsupported addressing mode!"); - break; } Offset += InstrOffs * Scale; @@ -1765,8 +1832,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Check that CPSR isn't set between the comparison instruction and the one we // want to change. - MachineBasicBlock::const_iterator I = CmpInstr, E = MI, - B = MI->getParent()->begin(); + MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin(); // Early exit if CmpInstr is at the beginning of the BB. if (I == B) return false; @@ -1777,6 +1843,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) + return false; if (!MO.isReg()) continue; // This instruction modifies or uses CPSR after the one we want to @@ -1838,6 +1906,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, for (unsigned IO = 0, EO = Instr.getNumOperands(); !isSafe && IO != EO; ++IO) { const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) { + isSafe = true; + break; + } if (!MO.isReg() || MO.getReg() != ARM::CPSR) continue; if (MO.isDef()) { @@ -1889,6 +1961,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, if (!MRI->hasOneNonDBGUse(Reg)) return false; + const MCInstrDesc &DefMCID = DefMI->getDesc(); + if (DefMCID.hasOptionalDef()) { + unsigned NumOps = DefMCID.getNumOperands(); + const MachineOperand &MO = DefMI->getOperand(NumOps-1); + if (MO.getReg() == ARM::CPSR && !MO.isDead()) + // If DefMI defines CPSR and it is not dead, it's obviously not safe + // to delete DefMI. + return false; + } + + const MCInstrDesc &UseMCID = UseMI->getDesc(); + if (UseMCID.hasOptionalDef()) { + unsigned NumOps = UseMCID.getNumOperands(); + if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) + // If the instruction sets the flag, do not attempt this optimization + // since it may change the semantics of the code. + return false; + } + unsigned UseOpc = UseMI->getOpcode(); unsigned NewUseOpc = 0; uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); @@ -1960,7 +2051,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, bool isKill = UseMI->getOperand(OpIdx).isKill(); unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), - *UseMI, UseMI->getDebugLoc(), + UseMI, UseMI->getDebugLoc(), get(NewUseOpc), NewReg) .addReg(Reg1, getKillRegState(isKill)) .addImm(SOImmValV1))); @@ -1988,7 +2079,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, switch (Opc) { default: llvm_unreachable("Unexpected multi-uops instruction!"); - break; case ARM::VLDMQIA: case ARM::VSTMQIA: return 2; @@ -2335,6 +2425,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return UseCycle; } +static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &DefIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_iterator I = MI; ++I; + MachineBasicBlock::const_instr_iterator II = + llvm::prior(I.getInstrIterator()); + assert(II->isInsideBundle() && "Empty bundle?"); + + int Idx = -1; + while (II->isInsideBundle()) { + Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI); + if (Idx != -1) + break; + --II; + ++Dist; + } + + assert(Idx != -1 && "Cannot find bundled definition!"); + DefIdx = Idx; + return II; +} + +static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, + const MachineInstr *MI, unsigned Reg, + unsigned &UseIdx, unsigned &Dist) { + Dist = 0; + + MachineBasicBlock::const_instr_iterator II = MI; ++II; + assert(II->isInsideBundle() && "Empty bundle?"); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + + // FIXME: This doesn't properly handle multiple uses. + int Idx = -1; + while (II != E && II->isInsideBundle()) { + Idx = II->findRegisterUseOperandIdx(Reg, false, TRI); + if (Idx != -1) + break; + if (II->getOpcode() != ARM::t2IT) + ++Dist; + ++II; + } + + if (Idx == -1) { + Dist = 0; + return 0; + } + + UseIdx = Idx; + return II; +} + int ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, @@ -2343,35 +2486,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, DefMI->isRegSequence() || DefMI->isImplicitDef()) return 1; - const MCInstrDesc &DefMCID = DefMI->getDesc(); if (!ItinData || ItinData->isEmpty()) - return DefMCID.mayLoad() ? 3 : 1; + return DefMI->mayLoad() ? 3 : 1; - const MCInstrDesc &UseMCID = UseMI->getDesc(); + const MCInstrDesc *DefMCID = &DefMI->getDesc(); + const MCInstrDesc *UseMCID = &UseMI->getDesc(); const MachineOperand &DefMO = DefMI->getOperand(DefIdx); - if (DefMO.getReg() == ARM::CPSR) { + unsigned Reg = DefMO.getReg(); + if (Reg == ARM::CPSR) { if (DefMI->getOpcode() == ARM::FMSTAT) { // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) return Subtarget.isCortexA9() ? 1 : 20; } // CPSR set and branch can be paired in the same cycle. - if (UseMCID.isBranch()) + if (UseMI->isBranch()) return 0; + + // Otherwise it takes the instruction latency (generally one). + int Latency = getInstrLatency(ItinData, DefMI); + + // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to + // its uses. Instructions which are otherwise scheduled between them may + // incur a code size penalty (not able to use the CPSR setting 16-bit + // instructions). + if (Latency > 0 && Subtarget.isThumb2()) { + const MachineFunction *MF = DefMI->getParent()->getParent(); + if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + --Latency; + } + return Latency; } unsigned DefAlign = DefMI->hasOneMemOperand() ? (*DefMI->memoperands_begin())->getAlignment() : 0; unsigned UseAlign = UseMI->hasOneMemOperand() ? (*UseMI->memoperands_begin())->getAlignment() : 0; - int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, - UseMCID, UseIdx, UseAlign); + + unsigned DefAdj = 0; + if (DefMI->isBundle()) { + DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj); + if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || + DefMI->isRegSequence() || DefMI->isImplicitDef()) + return 1; + DefMCID = &DefMI->getDesc(); + } + unsigned UseAdj = 0; + if (UseMI->isBundle()) { + unsigned NewUseIdx; + const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI, + Reg, NewUseIdx, UseAdj); + if (NewUseMI) { + UseMI = NewUseMI; + UseIdx = NewUseIdx; + UseMCID = &UseMI->getDesc(); + } + } + + int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign, + *UseMCID, UseIdx, UseAlign); + int Adj = DefAdj + UseAdj; + if (Adj) { + Latency -= (int)(DefAdj + UseAdj); + if (Latency < 1) + return 1; + } if (Latency > 1 && (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] // variants are one cycle cheaper. - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::LDRrs: case ARM::LDRBrs: { @@ -2396,28 +2581,38 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } if (DefAlign < 8 && Subtarget.isCortexA9()) - switch (DefMCID.getOpcode()) { + switch (DefMCID->getOpcode()) { default: break; case ARM::VLD1q8: case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: - case ARM::VLD1q8_UPD: - case ARM::VLD1q16_UPD: - case ARM::VLD1q32_UPD: - case ARM::VLD1q64_UPD: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: case ARM::VLD2d8: case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2425,7 +2620,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: - case ARM::VLD1d64T_UPD: + case ARM::VLD1d64Twb_fixed: + case ARM::VLD1d64Twb_register: case ARM::VLD3q8_UPD: case ARM::VLD3q16_UPD: case ARM::VLD3q32_UPD: @@ -2436,22 +2632,29 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d8_UPD: case ARM::VLD4d16_UPD: case ARM::VLD4d32_UPD: - case ARM::VLD1d64Q_UPD: + case ARM::VLD1d64Qwb_fixed: + case ARM::VLD1d64Qwb_register: case ARM::VLD4q8_UPD: case ARM::VLD4q16_UPD: case ARM::VLD4q32_UPD: case ARM::VLD1DUPq8: case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: - case ARM::VLD1DUPq8_UPD: - case ARM::VLD1DUPq16_UPD: - case ARM::VLD1DUPq32_UPD: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: case ARM::VLD2DUPd8: case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: - case ARM::VLD2DUPd8_UPD: - case ARM::VLD2DUPd16_UPD: - case ARM::VLD2DUPd32_UPD: + case ARM::VLD2DUPd8wb_fixed: + case ARM::VLD2DUPd16wb_fixed: + case ARM::VLD2DUPd32wb_fixed: + case ARM::VLD2DUPd8wb_register: + case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_register: case ARM::VLD4DUPd8: case ARM::VLD4DUPd16: case ARM::VLD4DUPd32: @@ -2559,26 +2762,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, if (DefAlign < 8 && Subtarget.isCortexA9()) switch (DefMCID.getOpcode()) { default: break; - case ARM::VLD1q8Pseudo: - case ARM::VLD1q16Pseudo: - case ARM::VLD1q32Pseudo: - case ARM::VLD1q64Pseudo: - case ARM::VLD1q8Pseudo_UPD: - case ARM::VLD1q16Pseudo_UPD: - case ARM::VLD1q32Pseudo_UPD: - case ARM::VLD1q64Pseudo_UPD: - case ARM::VLD2d8Pseudo: - case ARM::VLD2d16Pseudo: - case ARM::VLD2d32Pseudo: + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -2586,7 +2799,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: - case ARM::VLD1d64TPseudo_UPD: case ARM::VLD3q8Pseudo_UPD: case ARM::VLD3q16Pseudo_UPD: case ARM::VLD3q32Pseudo_UPD: @@ -2603,7 +2815,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: - case ARM::VLD1d64QPseudo_UPD: case ARM::VLD4q8Pseudo_UPD: case ARM::VLD4q16Pseudo_UPD: case ARM::VLD4q32Pseudo_UPD: @@ -2613,18 +2824,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD: - case ARM::VLD1DUPq8Pseudo: - case ARM::VLD1DUPq16Pseudo: - case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: - case ARM::VLD2DUPd8Pseudo: - case ARM::VLD2DUPd16Pseudo: - case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8Pseudo_UPD: - case ARM::VLD2DUPd16Pseudo_UPD: - case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: + case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16: + case ARM::VLD2DUPd32: + case ARM::VLD2DUPd8wb_fixed: + case ARM::VLD2DUPd16wb_fixed: + case ARM::VLD2DUPd32wb_fixed: + case ARM::VLD2DUPd8wb_register: + case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_register: case ARM::VLD4DUPd8Pseudo: case ARM::VLD4DUPd16Pseudo: case ARM::VLD4DUPd32Pseudo: @@ -2666,6 +2883,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned +ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const { + unsigned Reg = DefMI->getOperand(DefIdx).getReg(); + if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI)) + return 1; + + // If the second MI is predicated, then there is an implicit use dependency. + return getOperandLatency(ItinData, DefMI, DefIdx, DepMI, + DepMI->getNumOperands()); +} + int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -2676,10 +2906,21 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, if (!ItinData || ItinData->isEmpty()) return 1; + if (MI->isBundle()) { + int Latency = 0; + MachineBasicBlock::const_instr_iterator I = MI; + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + if (I->getOpcode() != ARM::t2IT) + Latency += getInstrLatency(ItinData, I, PredCost); + } + return Latency; + } + const MCInstrDesc &MCID = MI->getDesc(); unsigned Class = MCID.getSchedClass(); unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; - if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) + if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; @@ -2828,3 +3069,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // This will go before any implicit ops. AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } + +bool ARMBaseInstrInfo::hasNOP() const { + return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; +} diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 0f9f321..2fe8507 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===// +//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -35,6 +35,9 @@ protected: explicit ARMBaseInstrInfo(const ARMSubtarget &STI); public: + // Return whether the target has an explicit NOP encoding. + bool hasNOP() const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; @@ -69,10 +72,7 @@ public: bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; // Predication support. - bool isPredicated(const MachineInstr *MI) const { - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; - } + bool isPredicated(const MachineInstr *MI) const; ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { int PIdx = MI->findFirstPredOperandIdx(); @@ -139,6 +139,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; + MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const; @@ -213,12 +215,18 @@ public: SDNode *DefNode, unsigned DefIdx, SDNode *UseNode, unsigned UseIdx) const; + virtual unsigned getOutputLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *DepMI) const; + /// VFP/NEON execution domains. std::pair<uint16_t, uint16_t> getExecutionDomain(const MachineInstr *MI) const; void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; private: + unsigned getInstBundleLength(const MachineInstr *MI) const; + int getVLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, unsigned DefClass, diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 154f1f8..3907f75 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===// +//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -11,11 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "ARMBaseRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" -#include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" @@ -61,41 +60,14 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, BasePtr(ARM::R6) { } -const unsigned* +const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool ghcCall = false; - - if (MF) { - const Function *F = MF->getFunction(); - ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); - } - - static const unsigned CalleeSavedRegs[] = { - ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, - ARM::R7, ARM::R6, ARM::R5, ARM::R4, - - ARM::D15, ARM::D14, ARM::D13, ARM::D12, - ARM::D11, ARM::D10, ARM::D9, ARM::D8, - 0 - }; - - static const unsigned DarwinCalleeSavedRegs[] = { - // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved - // register. - ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, - ARM::R11, ARM::R10, ARM::R8, - - ARM::D15, ARM::D14, ARM::D13, ARM::D12, - ARM::D11, ARM::D10, ARM::D9, ARM::D8, - 0 - }; - - static const unsigned GhcCalleeSavedRegs[] = { - 0 - }; + return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; +} - return ghcCall ? GhcCalleeSavedRegs : - STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; +const uint32_t* +ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { + return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } BitVector ARMBaseRegisterInfo:: @@ -148,104 +120,6 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, return false; } -const TargetRegisterClass * -ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - unsigned SubIdx) const { - switch (SubIdx) { - default: return 0; - case ARM::ssub_0: - case ARM::ssub_1: - case ARM::ssub_2: - case ARM::ssub_3: { - // S sub-registers. - if (A->getSize() == 8) { - if (B == &ARM::SPR_8RegClass) - return &ARM::DPR_8RegClass; - assert(B == &ARM::SPRRegClass && "Expecting SPR register class!"); - if (A == &ARM::DPR_8RegClass) - return A; - return &ARM::DPR_VFP2RegClass; - } - - if (A->getSize() == 16) { - if (B == &ARM::SPR_8RegClass) - return &ARM::QPR_8RegClass; - return &ARM::QPR_VFP2RegClass; - } - - if (A->getSize() == 32) { - if (B == &ARM::SPR_8RegClass) - return 0; // Do not allow coalescing! - return &ARM::QQPR_VFP2RegClass; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - return 0; // Do not allow coalescing! - } - case ARM::dsub_0: - case ARM::dsub_1: - case ARM::dsub_2: - case ARM::dsub_3: { - // D sub-registers. - if (A->getSize() == 16) { - if (B == &ARM::DPR_VFP2RegClass) - return &ARM::QPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - if (A->getSize() == 32) { - if (B == &ARM::DPR_VFP2RegClass) - return &ARM::QQPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - if (B != &ARM::DPRRegClass) - return 0; // Do not allow coalescing! - return A; - } - case ARM::dsub_4: - case ARM::dsub_5: - case ARM::dsub_6: - case ARM::dsub_7: { - // D sub-registers of QQQQ registers. - if (A->getSize() == 64 && B == &ARM::DPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - - case ARM::qsub_0: - case ARM::qsub_1: { - // Q sub-registers. - if (A->getSize() == 32) { - if (B == &ARM::QPR_VFP2RegClass) - return &ARM::QQPR_VFP2RegClass; - if (B == &ARM::QPR_8RegClass) - return 0; // Do not allow coalescing! - return A; - } - - assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); - if (B == &ARM::QPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - case ARM::qsub_2: - case ARM::qsub_3: { - // Q sub-registers of QQQQ registers. - if (A->getSize() == 64 && B == &ARM::QPRRegClass) - return A; - return 0; // Do not allow coalescing! - } - } - return 0; -} - bool ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, SmallVectorImpl<unsigned> &SubIndices, @@ -416,7 +290,7 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, /// getRawAllocationOrder - Returns the register allocation order for a /// specified register class with a target-dependent hint. -ArrayRef<unsigned> +ArrayRef<uint16_t> ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, const MachineFunction &MF) const { @@ -425,71 +299,71 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC, // of register pairs. // No FP, R9 is available. - static const unsigned GPREven1[] = { + static const uint16_t GPREven1[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R9, ARM::R11 }; - static const unsigned GPROdd1[] = { + static const uint16_t GPROdd1[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R7, R9 is available. - static const unsigned GPREven2[] = { + static const uint16_t GPREven2[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R9, ARM::R11 }; - static const unsigned GPROdd2[] = { + static const uint16_t GPROdd2[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R11, R9 is available. - static const unsigned GPREven3[] = { + static const uint16_t GPREven3[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R9 }; - static const unsigned GPROdd3[] = { + static const uint16_t GPROdd3[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, ARM::R8 }; // No FP, R9 is not available. - static const unsigned GPREven4[] = { + static const uint16_t GPREven4[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, ARM::R11 }; - static const unsigned GPROdd4[] = { + static const uint16_t GPROdd4[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R7, R9 is not available. - static const unsigned GPREven5[] = { + static const uint16_t GPREven5[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R10, ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, ARM::R11 }; - static const unsigned GPROdd5[] = { + static const uint16_t GPROdd5[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R11, ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, ARM::R10 }; // FP is R11, R9 is not available. - static const unsigned GPREven6[] = { + static const uint16_t GPREven6[] = { ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 }; - static const unsigned GPROdd6[] = { + static const uint16_t GPROdd6[] = { ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 }; @@ -610,11 +484,15 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); if (!EnableBasePointer) return false; - if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) + // When outgoing call frames are so large that we adjust the stack pointer + // around the call, we can no longer use the stack pointer to reach the + // emergency spill slot. + if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF)) return true; // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited @@ -638,14 +516,29 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { } bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - return (RealignStack && !AFI->isThumb1OnlyFunction() && - (!MFI->hasVarSizedObjects() || EnableBasePointer)); + if (!MF.getTarget().Options.RealignStack) + return false; + if (AFI->isThumb1OnlyFunction()) + return false; + // Stack realignment requires a frame pointer. If we already started + // register allocation with frame pointer elimination, it is too late now. + if (!MRI->canReserveReg(FramePtr)) + return false; + // We may also need a base pointer if there are dynamic allocas or stack + // pointer adjustments around calls. + if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) + return true; + if (!EnableBasePointer) + return false; + // A base pointer is required and allowed. Check that it isn't too late to + // reserve it. + return MRI->canReserveReg(BasePtr); } bool ARMBaseRegisterInfo:: @@ -653,7 +546,7 @@ needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); - bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) || + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->hasFnAttr(Attribute::StackAlignment)); return requiresRealignment && canRealignStack(MF); @@ -662,7 +555,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (DisableFramePointerElim(MF) && MFI->adjustsStack()) + if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); @@ -679,12 +572,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); - return 0; } unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { llvm_unreachable("What is the exception handler register"); - return 0; } unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, @@ -892,7 +783,7 @@ int64_t ARMBaseRegisterInfo:: getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - int64_t InstrOffs = 0;; + int64_t InstrOffs = 0; int Scale = 1; unsigned ImmIdx = 0; switch (AddrMode) { @@ -933,7 +824,6 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const { } default: llvm_unreachable("Unsupported addressing mode!"); - break; } return InstrOffs * Scale; @@ -1129,7 +1019,6 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, break; default: llvm_unreachable("Unsupported addressing mode!"); - break; } Offset += getFrameIndexInstrOffset(MI, i); @@ -1171,6 +1060,21 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj); + // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the + // call frame setup/destroy instructions have already been eliminated. That + // means the stack pointer cannot be used to access the emergency spill slot + // when !hasReservedCallFrame(). +#ifndef NDEBUG + if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + assert(TFI->hasReservedCallFrame(MF) && + "Cannot use SP to access the emergency spill slot in " + "functions without a reserved call frame"); + assert(!MF.getFrameInfo()->hasVarSizedObjects() && + "Cannot use SP to access the emergency spill slot in " + "functions with variable sized frame objects"); + } +#endif // NDEBUG + // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index fee17ff..af79351 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -1,4 +1,4 @@ -//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===// +//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -35,7 +35,7 @@ namespace ARMRI { /// isARMArea1Register - Returns true if the register is a low register (r0-r7) /// or a stack/pc register that we should push/pop. -static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea1Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case R0: case R1: case R2: case R3: @@ -43,25 +43,25 @@ static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) { case LR: case SP: case PC: return true; case R8: case R9: case R10: case R11: - // For darwin we want r7 and lr to be next to each other. - return !isDarwin; + // For iOS we want r7 and lr to be next to each other. + return !isIOS; default: return false; } } -static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea2Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case R8: case R9: case R10: case R11: - // Darwin has this second area. - return isDarwin; + // iOS has this second area. + return isIOS; default: return false; } } -static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) { +static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { using namespace ARM; switch (Reg) { case D15: case D14: case D13: case D12: @@ -94,17 +94,11 @@ protected: public: /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const uint32_t *getCallPreservedMask(CallingConv::ID) const; BitVector getReservedRegs(const MachineFunction &MF) const; - /// getMatchingSuperRegClass - Return a subclass of the specified register - /// class A so that each register in it has a sub-register of the - /// specified sub-register index which is in the specified register class B. - virtual const TargetRegisterClass * - getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, unsigned Idx) const; - /// canCombineSubRegIndices - Given a register class and a list of /// subregister indices, return true if it's possible to combine the /// subregister indices into one that corresponds to a larger @@ -125,7 +119,7 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const; - ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC, + ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, const MachineFunction &MF) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h index 69eddf0..11bd6a4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h +++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h @@ -1,4 +1,4 @@ -//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===// +//===-- ARMBuildAttrs.h - ARM Build Attributes ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h index ff7db1f..0bd1c3e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h @@ -1,4 +1,4 @@ -//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===// +//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,13 +15,12 @@ #ifndef ARMCALLINGCONV_H #define ARMCALLINGCONV_H +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/Target/TargetInstrInfo.h" -#include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" -#include "ARMSubtarget.h" -#include "ARM.h" namespace llvm { @@ -29,7 +28,7 @@ namespace llvm { static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. if (unsigned Reg = State.AllocateReg(RegList, 4)) @@ -72,9 +71,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; - static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; - static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; + static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; + static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { @@ -118,8 +117,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; - static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; + static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); if (Reg == 0) diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 47b2e98..d33364b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -1,4 +1,4 @@ -//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===// +//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -25,7 +25,7 @@ def CC_ARM_APCS : CallingConv<[ // Handles byval parameters. CCIfByVal<CCPassByVal<4, 4>>, - CCIfType<[i8, i16], CCPromoteToType<i32>>, + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, @@ -43,6 +43,7 @@ def CC_ARM_APCS : CallingConv<[ ]>; def RetCC_ARM_APCS : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[f32], CCBitConvertToType<i32>>, // Handle all vector types as either f64 or v2f64. @@ -82,25 +83,6 @@ def RetFastCC_ARM_APCS : CallingConv<[ CCDelegateTo<RetCC_ARM_APCS> ]>; -//===----------------------------------------------------------------------===// -// ARM APCS Calling Convention for GHC -//===----------------------------------------------------------------------===// - -def CC_ARM_APCS_GHC : CallingConv<[ - // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, - - CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, - CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, - CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, - - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType<i32>>, - - // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim - CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>> -]>; //===----------------------------------------------------------------------===// // ARM AAPCS (EABI) Calling Convention, common parts @@ -108,7 +90,7 @@ def CC_ARM_APCS_GHC : CallingConv<[ def CC_ARM_AAPCS_Common : CallingConv<[ - CCIfType<[i8, i16], CCPromoteToType<i32>>, + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, // i64/f64 is passed in even pairs of GPRs // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register @@ -125,6 +107,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ ]>; def RetCC_ARM_AAPCS_Common : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> ]>; @@ -181,3 +164,14 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; + +//===----------------------------------------------------------------------===// +// Callee-saved register lists. +//===----------------------------------------------------------------------===// + +def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8))>; + +// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. +// Also save R7-R4 first to match the stack frame fixed spill areas. +def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp index 4148d4a..32ef345 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp @@ -15,7 +15,7 @@ #define DEBUG_TYPE "jit" #include "ARM.h" #include "ARMConstantPoolValue.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" @@ -46,7 +46,7 @@ namespace { class ARMCodeEmitter : public MachineFunctionPass { ARMJITInfo *JTI; - const ARMInstrInfo *II; + const ARMBaseInstrInfo *II; const TargetData *TD; const ARMSubtarget *Subtarget; TargetMachine &TM; @@ -66,7 +66,7 @@ namespace { public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), - II((const ARMInstrInfo *)tm.getInstrInfo()), + II((const ARMBaseInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} @@ -74,7 +74,7 @@ namespace { /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. - unsigned getBinaryCodeForInstr(const MachineInstr &MI) const; + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; bool runOnMachineFunction(MachineFunction &MF); @@ -189,6 +189,8 @@ namespace { unsigned Op) const { return 0; } unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) @@ -366,9 +368,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo(); - II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo(); - TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData(); + JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo(); + II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo(); + TD = MF.getTarget().getTargetData(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -386,7 +388,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) emitInstruction(*I); } @@ -406,7 +408,6 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { case ARM_AM::ror: case ARM_AM::rrx: return 3; } - return 0; } /// getMovi32Value - Return binary encoding of operand for movw/movt. If the @@ -532,7 +533,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { switch (MI.getDesc().TSFlags & ARMII::FormMask) { default: { llvm_unreachable("Unhandled instruction encoding format!"); - break; } case ARMII::MiscFrm: if (MI.getOpcode() == ARM::LEApcrelJT) { @@ -541,7 +541,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { break; } llvm_unreachable("Unhandled instruction encoding!"); - break; case ARMII::Pseudo: emitPseudoInstruction(MI); break; @@ -837,9 +836,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); case ARM::BX_CALL: - case ARM::BMOVPCRX_CALL: - case ARM::BXr9_CALL: - case ARM::BMOVPCRXr9_CALL: { + case ARM::BMOVPCRX_CALL: { // First emit mov lr, pc unsigned Binary = 0x01a0e00f; Binary |= II->getPredicate(&MI) << ARMII::CondShift; diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 3e3a413..fc35c7c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -16,16 +16,17 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" -#include "ARMInstrInfo.h" #include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -51,6 +52,44 @@ static cl::opt<bool> AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), cl::desc("Adjust basic block layout to better use TB[BH]")); +// FIXME: This option should be removed once it has received sufficient testing. +static cl::opt<bool> +AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true), + cl::desc("Align constant islands in code")); + +/// UnknownPadding - Return the worst case padding that could result from +/// unknown offset bits. This does not include alignment padding caused by +/// known offset bits. +/// +/// @param LogAlign log2(alignment) +/// @param KnownBits Number of known low offset bits. +static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { + if (KnownBits < LogAlign) + return (1u << LogAlign) - (1u << KnownBits); + return 0; +} + +/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact, +/// add padding such that: +/// +/// 1. The result is aligned to 1 << LogAlign. +/// +/// 2. No other value of the unknown bits would require more padding. +/// +/// This may add more padding than is required to satisfy just one of the +/// constraints. It is necessary to compute alignment this way to guarantee +/// that we don't underestimate the padding before an aligned block. If the +/// real padding before a block is larger than we think, constant pool entries +/// may go out of range. +static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign, + unsigned KnownBits) { + // Add the worst possible padding that the unknown bits could cause. + Offset += UnknownPadding(LogAlign, KnownBits); + + // Then align the result. + return RoundUpToAlignment(Offset, 1u << LogAlign); +} + namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM /// requires constant pool entries to be scattered among the instructions @@ -64,16 +103,70 @@ namespace { /// CPE - A constant pool entry that has been placed somewhere, which /// tracks a list of users. class ARMConstantIslands : public MachineFunctionPass { - /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed - /// by MBB Number. The two-byte pads required for Thumb alignment are - /// counted as part of the following block (i.e., the offset and size for - /// a padded block will both be ==2 mod 4). - std::vector<unsigned> BBSizes; + /// BasicBlockInfo - Information about the offset and size of a single + /// basic block. + struct BasicBlockInfo { + /// Offset - Distance from the beginning of the function to the beginning + /// of this basic block. + /// + /// The offset is always aligned as required by the basic block. + unsigned Offset; + + /// Size - Size of the basic block in bytes. If the block contains + /// inline assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + /// KnownBits - The number of low bits in Offset that are known to be + /// exact. The remaining bits of Offset are an upper bound. + uint8_t KnownBits; + + /// Unalign - When non-zero, the block contains instructions (inline asm) + /// of unknown size. The real size may be smaller than Size bytes by a + /// multiple of 1 << Unalign. + uint8_t Unalign; + + /// PostAlign - When non-zero, the block terminator contains a .align + /// directive, so the end of the block is aligned to 1 << PostAlign + /// bytes. + uint8_t PostAlign; + + BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0), + PostAlign(0) {} + + /// Compute the number of known offset bits internally to this block. + /// This number should be used to predict worst case padding when + /// splitting the block. + unsigned internalKnownBits() const { + return Unalign ? Unalign : KnownBits; + } + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + unsigned LA = std::max(unsigned(PostAlign), LogAlign); + if (!LA) + return PO; + // Add alignment padding from the terminator. + return WorstCaseAlign(PO, LA, internalKnownBits()); + } + + /// Compute the number of known low bits of postOffset. If this block + /// contains inline asm, the number of known bits drops to the + /// instruction alignment. An aligned terminator may increase the number + /// of know bits. + /// If LogAlign is given, also consider the alignment of the next block. + unsigned postKnownBits(unsigned LogAlign = 0) const { + return std::max(std::max(unsigned(PostAlign), LogAlign), + internalKnownBits()); + } + }; - /// BBOffsets - the offset of each MBB in bytes, starting from 0. - /// The two-byte pads required for Thumb alignment are counted as part of - /// the following block. - std::vector<unsigned> BBOffsets; + std::vector<BasicBlockInfo> BBInfo; /// WaterList - A sorted list of basic blocks where islands could be placed /// (i.e. blocks that don't fall through to the following block, due @@ -102,14 +195,24 @@ namespace { MachineInstr *MI; MachineInstr *CPEMI; MachineBasicBlock *HighWaterMark; + private: unsigned MaxDisp; + public: bool NegOk; bool IsSoImm; + bool KnownAlignment; CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, bool neg, bool soimm) - : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) { + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm), + KnownAlignment(false) { HighWaterMark = CPEMI->getParent(); } + /// getMaxDisp - Returns the maximum displacement supported by MI. + /// Correct for unknown alignment. + /// Conservatively subtract 2 bytes to handle weird alignment effects. + unsigned getMaxDisp() const { + return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2; + } }; /// CPUsers - Keep track of all of the machine instructions that use various @@ -162,10 +265,9 @@ namespace { /// the branch fix up pass. bool HasFarJump; - /// HasInlineAsm - True if the function contains inline assembly. - bool HasInlineAsm; - - const ARMInstrInfo *TII; + MachineFunction *MF; + MachineConstantPool *MCP; + const ARMBaseInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool isThumb; @@ -182,85 +284,100 @@ namespace { } private: - void DoInitialPlacement(MachineFunction &MF, - std::vector<MachineInstr*> &CPEMIs); + void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - void JumpTableFunctionScan(MachineFunction &MF); - void InitialFunctionScan(MachineFunction &MF, - const std::vector<MachineInstr*> &CPEMIs); - MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); - void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); - void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); - bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); - int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); - bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); - void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, + unsigned getCPELogAlign(const MachineInstr *CPEMI); + void scanFunctionJumpTables(); + void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + int findInRangeCPEntry(CPUser& U, unsigned UserOffset); + bool findAvailableWater(CPUser&U, unsigned UserOffset, + water_iterator &WaterIter); + void createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); - void RemoveDeadCPEMI(MachineInstr *CPEMI); - bool RemoveUnusedCPEntries(); - bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, - MachineInstr *CPEMI, unsigned Disp, bool NegOk, - bool DoDump = false); - bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, - CPUser &U); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, - unsigned Disp, bool NegativeOK, bool IsSoImm = false); - bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br); - bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br); - bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br); - bool UndoLRSpillRestore(); - bool OptimizeThumb2Instructions(MachineFunction &MF); - bool OptimizeThumb2Branches(MachineFunction &MF); - bool ReorderThumb2JumpTables(MachineFunction &MF); - bool OptimizeThumb2JumpTables(MachineFunction &MF); - MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, + bool handleConstantPoolUser(unsigned CPUserIndex); + void removeDeadCPEMI(MachineInstr *CPEMI); + bool removeUnusedCPEntries(); + bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, unsigned Disp, bool NegOk, + bool DoDump = false); + bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, + CPUser &U, unsigned &Growth); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + bool fixupUnconditionalBr(ImmBranch &Br); + bool undoLRSpillRestore(); + bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const; + bool optimizeThumb2Instructions(); + bool optimizeThumb2Branches(); + bool reorderThumb2JumpTables(); + bool optimizeThumb2JumpTables(); + MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); - unsigned GetOffsetOf(MachineInstr *MI) const; + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + unsigned getUserOffset(CPUser&) const; void dumpBBs(); - void verify(MachineFunction &MF); + void verify(); + + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, + unsigned Disp, bool NegativeOK, bool IsSoImm = false); + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, + const CPUser &U) { + return isOffsetInRange(UserOffset, TrialOffset, + U.getMaxDisp(), U.NegOk, U.IsSoImm); + } }; char ARMConstantIslands::ID = 0; } /// verify - check BBOffsets, BBSizes, alignment of islands -void ARMConstantIslands::verify(MachineFunction &MF) { - assert(BBOffsets.size() == BBSizes.size()); - for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) - assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); - if (!isThumb) - return; +void ARMConstantIslands::verify() { #ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && - MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned MBBId = MBB->getNumber(); - assert(HasInlineAsm || - (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || - (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0)); - } + unsigned Align = MBB->getAlignment(); + unsigned MBBId = MBB->getNumber(); + assert(BBInfo[MBBId].Offset % (1u << Align) == 0); + assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } + DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; - unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8); - unsigned CPEOffset = GetOffsetOf(U.CPEMI); - unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset : - UserOffset - CPEOffset; - assert(Disp <= U.MaxDisp || "Constant pool entry out of range!"); + unsigned UserOffset = getUserOffset(U); + // Verify offset using the real max displacement without the safety + // adjustment. + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk, + /* DoDump = */ true)) { + DEBUG(dbgs() << "OK\n"); + continue; + } + DEBUG(dbgs() << "Out of range.\n"); + dumpBBs(); + DEBUG(MF->dump()); + llvm_unreachable("Constant pool entry out of range!"); } #endif } /// print block size and offset information - debugging void ARMConstantIslands::dumpBBs() { - for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) { - DEBUG(errs() << "block " << J << " offset " << BBOffsets[J] - << " size " << BBSizes[J] << "\n"); - } + DEBUG({ + for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { + const BasicBlockInfo &BBI = BBInfo[J]; + dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + << " kb=" << unsigned(BBI.KnownBits) + << " ua=" << unsigned(BBI.Unalign) + << " pa=" << unsigned(BBI.PostAlign) + << format(" size=%#x\n", BBInfo[J].Size); + } + }); } /// createARMConstantIslandPass - returns an instance of the constpool @@ -269,34 +386,41 @@ FunctionPass *llvm::createARMConstantIslandPass() { return new ARMConstantIslands(); } -bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { - MachineConstantPool &MCP = *MF.getConstantPool(); +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MCP = mf.getConstantPool(); - TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo(); - AFI = MF.getInfo<ARMFunctionInfo>(); - STI = &MF.getTarget().getSubtarget<ARMSubtarget>(); + DEBUG(dbgs() << "***** ARMConstantIslands: " + << MCP->getConstants().size() << " CP entries, aligned to " + << MCP->getConstantPoolAlignment() << " bytes *****\n"); + + TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo(); + AFI = MF->getInfo<ARMFunctionInfo>(); + STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); isThumb = AFI->isThumbFunction(); isThumb1 = AFI->isThumb1OnlyFunction(); isThumb2 = AFI->isThumb2Function(); HasFarJump = false; - HasInlineAsm = false; + + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. - MF.RenumberBlocks(); + MF->RenumberBlocks(); // Try to reorder and otherwise adjust the block layout to make good use // of the TB[BH] instructions. bool MadeChange = false; if (isThumb2 && AdjustJumpTableBlocks) { - JumpTableFunctionScan(MF); - MadeChange |= ReorderThumb2JumpTables(MF); + scanFunctionJumpTables(); + MadeChange |= reorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. T2JumpTables.clear(); // Blocks may have shifted around. Keep the numbering up to date. - MF.RenumberBlocks(); + MF->RenumberBlocks(); } // Thumb1 functions containing constant pools get 4-byte alignment. @@ -304,16 +428,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // ARM and Thumb2 functions need to be 4-byte aligned. if (!isThumb1) - MF.EnsureAlignment(2); // 2 = log2(4) + MF->EnsureAlignment(2); // 2 = log2(4) // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; - if (!MCP.isEmpty()) { - DoInitialPlacement(MF, CPEMIs); - if (isThumb1) - MF.EnsureAlignment(2); // 2 = log2(4) - } + if (!MCP->isEmpty()) + doInitialPlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -321,34 +442,36 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(MF, CPEMIs); + initializeFunctionInfo(CPEMIs); CPEMIs.clear(); DEBUG(dumpBBs()); /// Remove dead constant pool entries. - MadeChange |= RemoveUnusedCPEntries(); + MadeChange |= removeUnusedCPEntries(); // Iteratively place constant pool entries and fix up branches until there // is no change. unsigned NoCPIters = 0, NoBRIters = 0; while (true) { + DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= HandleConstantPoolUser(MF, i); + CPChange |= handleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) - llvm_unreachable("Constant Island pass failed to converge!"); + report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); // Clear NewWaterList now. If we split a block for branches, it should // appear as "new water" for the next iteration of constant pool placement. NewWaterList.clear(); + DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= FixUpImmediateBr(MF, ImmBranches[i]); + BRChange |= fixupImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) - llvm_unreachable("Branch Fix Up pass failed to converge!"); + report_fatal_error("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); if (!CPChange && !BRChange) @@ -358,15 +481,15 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Shrink 32-bit Thumb2 branch, load, and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) - MadeChange |= OptimizeThumb2Instructions(MF); + MadeChange |= optimizeThumb2Instructions(); // After a while, this might be made debug-only, but it is not expensive. - verify(MF); + verify(); // If LR has been forced spilled and no far jump (i.e. BL) has been issued, // undo the spill / restore of LR if possible. if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) - MadeChange |= UndoLRSpillRestore(); + MadeChange |= undoLRSpillRestore(); // Save the mapping between original and cloned constpool entries. for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { @@ -376,10 +499,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { } } - DEBUG(errs() << '\n'; dumpBBs()); + DEBUG(dbgs() << '\n'; dumpBBs()); - BBSizes.clear(); - BBOffsets.clear(); + BBInfo.clear(); WaterList.clear(); CPUsers.clear(); CPEntries.clear(); @@ -390,39 +512,68 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } -/// DoInitialPlacement - Perform the initial placement of the constant pool +/// doInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. -void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, - std::vector<MachineInstr*> &CPEMIs) { +void +ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. - MachineBasicBlock *BB = MF.CreateMachineBasicBlock(); - MF.push_back(BB); + MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); + MF->push_back(BB); + + // MachineConstantPool measures alignment in bytes. We measure in log2(bytes). + unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment()); + + // Mark the basic block as required by the const-pool. + // If AlignConstantIslands isn't set, use 4-byte alignment for everything. + BB->setAlignment(AlignConstantIslands ? MaxAlign : 2); + + // The function needs to be as aligned as the basic blocks. The linker may + // move functions around based on their alignment. + MF->EnsureAlignment(BB->getAlignment()); + + // Order the entries in BB by descending alignment. That ensures correct + // alignment of all entries as long as BB is sufficiently aligned. Keep + // track of the insertion point for each alignment. We are going to bucket + // sort the entries as they are created. + SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end()); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. - const std::vector<MachineConstantPoolEntry> &CPs = - MF.getConstantPool()->getConstants(); + const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const TargetData &TD = *MF.getTarget().getTargetData(); + const TargetData &TD = *MF->getTarget().getTargetData(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); - // Verify that all constant pool entries are a multiple of 4 bytes. If not, - // we would have to pad them out or something so that instructions stay - // aligned. - assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!"); + assert(Size >= 4 && "Too small constant pool entry"); + unsigned Align = CPs[i].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid alignment"); + // Verify that all constant pool entries are a multiple of their alignment. + // If not, we would have to pad them out so that instructions stay aligned. + assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!"); + + // Insert CONSTPOOL_ENTRY before entries with a smaller alignment. + unsigned LogAlign = Log2_32(Align); + MachineBasicBlock::iterator InsAt = InsPoint[LogAlign]; MachineInstr *CPEMI = - BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) + BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) .addImm(i).addConstantPoolIndex(i).addImm(Size); CPEMIs.push_back(CPEMI); + // Ensure that future entries with higher alignment get inserted before + // CPEMI. This is bucket sort with iterators. + for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a) + if (InsPoint[a] == InsAt) + InsPoint[a] = CPEMI; + // Add a new CPEntry, but no corresponding CPUser yet. std::vector<CPEntry> CPEs; CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); ++NumCPEs; - DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i - << "\n"); + DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " + << Size << ", align = " << Align <<'\n'); } + DEBUG(BB->dump()); } /// BBHasFallthrough - Return true if the specified basic block can fallthrough @@ -458,41 +609,61 @@ ARMConstantIslands::CPEntry return NULL; } -/// JumpTableFunctionScan - Do a scan of the function, building up +/// getCPELogAlign - Returns the required alignment of the constant pool entry +/// represented by CPEMI. Alignment is measured in log2(bytes) units. +unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { + assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY); + + // Everything is 4-byte aligned unless AlignConstantIslands is set. + if (!AlignConstantIslands) + return 2; + + unsigned CPI = CPEMI->getOperand(1).getIndex(); + assert(CPI < MCP->getConstants().size() && "Invalid constant pool index."); + unsigned Align = MCP->getConstants()[CPI].getAlignment(); + assert(isPowerOf2_32(Align) && "Invalid CPE alignment"); + return Log2_32(Align); +} + +/// scanFunctionJumpTables - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. -void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); +void ARMConstantIslands::scanFunctionJumpTables() { + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) - if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT) + if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT) T2JumpTables.push_back(I); } } -/// InitialFunctionScan - Do the initial scan of the function, building up +/// initializeFunctionInfo - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. -void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, - const std::vector<MachineInstr*> &CPEMIs) { - // First thing, see if the function has any inline assembly in it. If so, - // we have to be conservative about alignment assumptions, as we don't - // know for sure the size of any instructions in the inline assembly. - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) - if (I->getOpcode() == ARM::INLINEASM) - HasInlineAsm = true; - } +void ARMConstantIslands:: +initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { + BBInfo.clear(); + BBInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) + computeBlockSize(I); + + // The known bits of the entry block offset are determined by the function + // alignment. + BBInfo.front().KnownBits = MF->getAlignment(); + + // Compute block offsets and known bits. + adjustBBOffsetsAfter(MF->begin()); // Now go back through the instructions and build up our data structures. - unsigned Offset = 0; - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -501,16 +672,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, if (!BBHasFallthrough(&MBB)) WaterList.push_back(&MBB); - unsigned MBBSize = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { if (I->isDebugValue()) continue; - // Add instruction size to MBBSize. - MBBSize += TII->GetInstSizeInBytes(I); int Opc = I->getOpcode(); - if (I->getDesc().isBranch()) { + if (I->isBranch()) { bool isCond = false; unsigned Bits = 0; unsigned Scale = 1; @@ -518,18 +686,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, switch (Opc) { default: continue; // Ignore other JT branches - case ARM::tBR_JTr: - // A Thumb1 table jump may involve padding; for the offsets to - // be right, functions containing these must be 4-byte aligned. - // tBR_JTr expands to a mov pc followed by .align 2 and then the jump - // table entries. So this code checks whether offset of tBR_JTr + 2 - // is aligned. That is held in Offset+MBBSize, which already has - // 2 added in for the size of the mov pc instruction. - MF.EnsureAlignment(2U); - if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) - // FIXME: Add a pseudo ALIGN instruction instead. - MBBSize += 2; // padding - continue; // Does not get an entry in ImmBranches case ARM::t2BR_JT: T2JumpTables.push_back(I); continue; // Does not get an entry in ImmBranches @@ -589,7 +745,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, switch (Opc) { default: llvm_unreachable("Unknown addressing mode for CP reference!"); - break; // Taking the address of a CP entry. case ARM::LEApcrel: @@ -647,45 +802,53 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, break; } } + } +} + +/// computeBlockSize - Compute the size and some alignment information for MBB. +/// This function updates BBInfo directly. +void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { + BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; + BBI.Size = 0; + BBI.Unalign = 0; + BBI.PostAlign = 0; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + BBI.Size += TII->GetInstSizeInBytes(I); + // For inline asm, GetInstSizeInBytes returns a conservative estimate. + // The actual size may be smaller, but still a multiple of the instr size. + if (I->isInlineAsm()) + BBI.Unalign = isThumb ? 1 : 2; + // Also consider instructions that may be shrunk later. + else if (isThumb && mayOptimizeThumb2Instruction(I)) + BBI.Unalign = 1; + } - // In thumb mode, if this block is a constpool island, we may need padding - // so it's aligned on 4 byte boundary. - if (isThumb && - !MBB.empty() && - MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && - ((Offset%4) != 0 || HasInlineAsm)) - MBBSize += 2; - - BBSizes.push_back(MBBSize); - BBOffsets.push_back(Offset); - Offset += MBBSize; + // tBR_JTr contains a .align 2 directive. + if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) { + BBI.PostAlign = 2; + MBB->getParent()->EnsureAlignment(2); } } -/// GetOffsetOf - Return the current offset of the specified machine instruction +/// getOffsetOf - Return the current offset of the specified machine instruction /// from the start of the function. This offset changes as stuff is moved /// around inside the function. -unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { +unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); // The offset is composed of two things: the sum of the sizes of all MBB's // before this instruction's block, and the offset from the start of the block // it is in. - unsigned Offset = BBOffsets[MBB->getNumber()]; - - // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has - // alignment padding, and compensate if so. - if (isThumb && - MI->getOpcode() == ARM::CONSTPOOL_ENTRY && - (Offset%4 != 0 || HasInlineAsm)) - Offset += 2; + unsigned Offset = BBInfo[MBB->getNumber()].Offset; // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) { + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - if (&*I == MI) return Offset; Offset += TII->GetInstSizeInBytes(I); } + return Offset; } /// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB @@ -695,19 +858,16 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS, return LHS->getNumber() < RHS->getNumber(); } -/// UpdateForInsertedWaterBlock - When a block is newly inserted into the +/// updateForInsertedWaterBlock - When a block is newly inserted into the /// machine function, it upsets all of the block numbers. Renumber the blocks /// and update the arrays that parallel this numbering. -void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { +void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { // Renumber the MBB's to keep them consecutive. NewBB->getParent()->RenumberBlocks(NewBB); - // Insert a size into BBSizes to align it properly with the (newly + // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. - BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); - - // Likewise for BBOffsets. - BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Next, update WaterList. Specifically, we need to add NewMBB as having // available water after it. @@ -721,15 +881,14 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. -MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { +MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); - MachineFunction &MF = *OrigBB->getParent(); // Create a new MBB for the code after the OrigBB. MachineBasicBlock *NewBB = - MF.CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); MachineFunction::iterator MBBI = OrigBB; ++MBBI; - MF.insert(MBBI, NewBB); + MF->insert(MBBI, NewBB); // Splice the instructions starting with MI over to NewBB. NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); @@ -747,31 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. - while (!OrigBB->succ_empty()) { - MachineBasicBlock *Succ = *OrigBB->succ_begin(); - OrigBB->removeSuccessor(Succ); - NewBB->addSuccessor(Succ); - - // This pass should be run after register allocation, so there should be no - // PHI nodes to update. - assert((Succ->empty() || !Succ->begin()->isPHI()) - && "PHI nodes should be eliminated by now!"); - } + NewBB->transferSuccessors(OrigBB); // OrigBB branches to NewBB. OrigBB->addSuccessor(NewBB); // Update internal data structures to account for the newly inserted MBB. - // This is almost the same as UpdateForInsertedWaterBlock, except that + // This is almost the same as updateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. - MF.RenumberBlocks(NewBB); + MF->RenumberBlocks(NewBB); - // Insert a size into BBSizes to align it properly with the (newly + // Insert an entry into BBInfo to align it properly with the (newly // renumbered) block numbers. - BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0); - - // Likewise for BBOffsets. - BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0); + BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); // Next, update WaterList. Specifically, we need to add OrigMBB as having // available water after it (but not if it's already there, which happens @@ -787,86 +934,56 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(IP, OrigBB); NewWaterList.insert(OrigBB); - unsigned OrigBBI = OrigBB->getNumber(); - unsigned NewBBI = NewBB->getNumber(); - - int delta = isThumb1 ? 2 : 4; - // Figure out how large the OrigBB is. As the first half of the original // block, it cannot contain a tablejump. The size includes // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) - unsigned OrigBBSize = 0; - for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end(); - I != E; ++I) - OrigBBSize += TII->GetInstSizeInBytes(I); - BBSizes[OrigBBI] = OrigBBSize; - - // ...and adjust BBOffsets for NewBB accordingly. - BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI]; + computeBlockSize(OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. - unsigned NewBBSize = 0; - for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end(); - I != E; ++I) - NewBBSize += TII->GetInstSizeInBytes(I); - // Set the size of NewBB in BBSizes. It does not include any padding now. - BBSizes[NewBBI] = NewBBSize; - - MachineInstr* ThumbJTMI = prior(NewBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { - // We've added another 2-byte instruction before this tablejump, which - // means we will always need padding if we didn't before, and vice versa. - - // The original offset of the jump instruction was: - unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta; - if (OrigOffset%4 == 0) { - // We had padding before and now we don't. No net change in code size. - delta = 0; - } else { - // We didn't have padding before and now we do. - BBSizes[NewBBI] += 2; - delta = 4; - } - } + computeBlockSize(NewBB); // All BBOffsets following these blocks must be modified. - if (delta) - AdjustBBOffsetsAfter(NewBB, delta); + adjustBBOffsetsAfter(OrigBB); return NewBB; } -/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool +/// getUserOffset - Compute the offset of U.MI as seen by the hardware +/// displacement computation. Update U.KnownAlignment to match its current +/// basic block location. +unsigned ARMConstantIslands::getUserOffset(CPUser &U) const { + unsigned UserOffset = getOffsetOf(U.MI); + const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()]; + unsigned KnownBits = BBI.internalKnownBits(); + + // The value read from PC is offset from the actual instruction address. + UserOffset += (isThumb ? 4 : 8); + + // Because of inline assembly, we may not know the alignment (mod 4) of U.MI. + // Make sure U.getMaxDisp() returns a constrained range. + U.KnownAlignment = (KnownBits >= 2); + + // On Thumb, offsets==2 mod 4 are rounded down by the hardware for + // purposes of the displacement computation; compensate for that here. + // For unknown alignments, getMaxDisp() constrains the range instead. + if (isThumb && U.KnownAlignment) + UserOffset &= ~3u; + + return UserOffset; +} + +/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool /// reference) is within MaxDisp of TrialOffset (a proposed location of a /// constant pool entry). -bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, +/// UserOffset is computed by getUserOffset above to include PC adjustments. If +/// the mod 4 alignment of UserOffset is not known, the uncertainty must be +/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. +bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK, bool IsSoImm) { - // On Thumb offsets==2 mod 4 are rounded down by the hardware for - // purposes of the displacement computation; compensate for that here. - // Effectively, the valid range of displacements is 2 bytes smaller for such - // references. - unsigned TotalAdj = 0; - if (isThumb && UserOffset%4 !=0) { - UserOffset -= 2; - TotalAdj = 2; - } - // CPEs will be rounded up to a multiple of 4. - if (isThumb && TrialOffset%4 != 0) { - TrialOffset += 2; - TotalAdj += 2; - } - - // In Thumb2 mode, later branch adjustments can shift instructions up and - // cause alignment change. In the worst case scenario this can cause the - // user's effective address to be subtracted by 2 and the CPE's address to - // be plus 2. - if (isThumb2 && TotalAdj != 4) - MaxDisp -= (4 - TotalAdj); - if (UserOffset <= TrialOffset) { // User before the Trial. if (TrialOffset - UserOffset <= MaxDisp) @@ -880,40 +997,71 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, return false; } -/// WaterIsInRange - Returns true if a CPE placed after the specified +/// isWaterInRange - Returns true if a CPE placed after the specified /// Water (a basic block) will be in range for the specific MI. - -bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, - MachineBasicBlock* Water, CPUser &U) { - unsigned MaxDisp = U.MaxDisp; - unsigned CPEOffset = BBOffsets[Water->getNumber()] + - BBSizes[Water->getNumber()]; - - // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. - if (CPEOffset < UserOffset) - UserOffset += U.CPEMI->getOperand(2).getImm(); - - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm); +/// +/// Compute how much the function will grow by inserting a CPE after Water. +bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, + MachineBasicBlock* Water, CPUser &U, + unsigned &Growth) { + unsigned CPELogAlign = getCPELogAlign(U.CPEMI); + unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign); + unsigned NextBlockOffset, NextBlockAlignment; + MachineFunction::const_iterator NextBlock = Water; + if (++NextBlock == MF->end()) { + NextBlockOffset = BBInfo[Water->getNumber()].postOffset(); + NextBlockAlignment = 0; + } else { + NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset; + NextBlockAlignment = NextBlock->getAlignment(); + } + unsigned Size = U.CPEMI->getOperand(2).getImm(); + unsigned CPEEnd = CPEOffset + Size; + + // The CPE may be able to hide in the alignment padding before the next + // block. It may also cause more padding to be required if it is more aligned + // that the next block. + if (CPEEnd > NextBlockOffset) { + Growth = CPEEnd - NextBlockOffset; + // Compute the padding that would go at the end of the CPE to align the next + // block. + Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); + + // If the CPE is to be inserted before the instruction, that will raise + // the offset of the instruction. Also account for unknown alignment padding + // in blocks between CPE and the user. + if (CPEOffset < UserOffset) + UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); + } else + // CPE fits in existing padding. + Growth = 0; + + return isOffsetInRange(UserOffset, CPEOffset, U); } -/// CPEIsInRange - Returns true if the distance between specific MI and +/// isCPEntryInRange - Returns true if the distance between specific MI and /// specific ConstPool entry instruction can fit in MI's displacement field. -bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, +bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { - unsigned CPEOffset = GetOffsetOf(CPEMI); - assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE"); + unsigned CPEOffset = getOffsetOf(CPEMI); + assert(CPEOffset % 4 == 0 && "Misaligned CPE"); if (DoDump) { - DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm() - << " max delta=" << MaxDisp - << " insn address=" << UserOffset - << " CPE address=" << CPEOffset - << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI); + DEBUG({ + unsigned Block = MI->getParent()->getNumber(); + const BasicBlockInfo &BBI = BBInfo[Block]; + dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " max delta=" << MaxDisp + << format(" insn address=%#x", UserOffset) + << " in BB#" << Block << ": " + << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI + << format("CPE address=%#x offset=%+d: ", CPEOffset, + int(CPEOffset-UserOffset)); + }); } - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); + return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk); } #ifndef NDEBUG @@ -933,69 +1081,40 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { } #endif // NDEBUG -void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, - int delta) { - MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI); - for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs(); - i < e; ++i) { - BBOffsets[i] += delta; - // If some existing blocks have padding, adjust the padding as needed, a - // bit tricky. delta can be negative so don't use % on that. - if (!isThumb) - continue; - MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && !HasInlineAsm) { - // Constant pool entries require padding. - if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned OldOffset = BBOffsets[i] - delta; - if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } - } - // Thumb1 jump tables require padding. They should be at the end; - // following unconditional branches are removed by AnalyzeBranch. - // tBR_JTr expands to a mov pc followed by .align 2 and then the jump - // table entries. So this code checks whether offset of tBR_JTr - // is aligned; if it is, the offset of the jump table following the - // instruction will not be aligned, and we need padding. - MachineInstr *ThumbJTMI = prior(MBB->end()); - if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { - unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); - unsigned OldMIOffset = NewMIOffset - delta; - if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } - } - if (delta==0) - return; - } - MBBI = llvm::next(MBBI); +void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { + unsigned BBNum = BB->getNumber(); + for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); + unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); + unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); + + // This is where block i begins. Stop if the offset is already correct, + // and we have updated 2 blocks. This is the maximum number of blocks + // changed before calling this function. + if (i > BBNum + 2 && + BBInfo[i].Offset == Offset && + BBInfo[i].KnownBits == KnownBits) + break; + + BBInfo[i].Offset = Offset; + BBInfo[i].KnownBits = KnownBits; } } -/// DecrementOldEntry - find the constant pool entry with index CPI +/// decrementCPEReferenceCount - find the constant pool entry with index CPI /// and instruction CPEMI, and decrement its refcount. If the refcount /// becomes 0 remove the entry and instruction. Returns true if we removed /// the entry, false if we didn't. -bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { +bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, + MachineInstr *CPEMI) { // Find the old entry. Eliminate it if it is no longer used. CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); assert(CPE && "Unexpected!"); if (--CPE->RefCount == 0) { - RemoveDeadCPEMI(CPEMI); + removeDeadCPEMI(CPEMI); CPE->CPEMI = NULL; --NumCPEs; return true; @@ -1009,14 +1128,15 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { /// 0 = no existing entry found /// 1 = entry found, and there were no code insertions or deletions /// 2 = entry found, and there were code insertions or deletions -int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) +int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) { MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; // Check to see if the CPE is already in-range. - if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { - DEBUG(errs() << "In range\n"); + if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, + true)) { + DEBUG(dbgs() << "In range\n"); return 1; } @@ -1030,8 +1150,9 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { - DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#" + if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), + U.NegOk)) { + DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement U.CPEMI = CPEs[i].CPEMI; @@ -1045,7 +1166,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) CPEs[i].RefCount++; // ...and the original. If we didn't remove the old entry, none of the // addresses changed, so we don't need another pass. - return DecrementOldEntry(CPI, CPEMI) ? 2 : 1; + return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; } } return 0; @@ -1066,7 +1187,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { return ((1<<23)-1)*4; } -/// LookForWater - Look for an existing entry in the WaterList in which +/// findAvailableWater - Look for an existing entry in the WaterList in which /// we can place the CPE referenced from U so it's within range of U's MI. /// Returns true if found, false if not. If it returns true, WaterIter /// is set to the WaterList entry. For Thumb, prefer water that will not @@ -1074,15 +1195,14 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { /// terminates, the CPE location for a particular CPUser is only allowed to /// move to a lower address, so search backward from the end of the list and /// prefer the first water that is in range. -bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, +bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, water_iterator &WaterIter) { if (WaterList.empty()) return false; - bool FoundWaterThatWouldPad = false; - water_iterator IPThatWouldPad; - for (water_iterator IP = prior(WaterList.end()), - B = WaterList.begin();; --IP) { + unsigned BestGrowth = ~0u; + for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; + --IP) { MachineBasicBlock* WaterBB = *IP; // Check if water is in range and is either at a lower address than the // current "high water mark" or a new water block that was created since @@ -1092,166 +1212,186 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, // should be relatively uncommon and when it does happen, we want to be // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. - if (WaterIsInRange(UserOffset, WaterBB, U) && + unsigned Growth; + if (isWaterInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || - NewWaterList.count(WaterBB))) { - unsigned WBBId = WaterBB->getNumber(); - if (isThumb && - (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { - // This is valid Water, but would introduce padding. Remember - // it in case we don't find any Water that doesn't do this. - if (!FoundWaterThatWouldPad) { - FoundWaterThatWouldPad = true; - IPThatWouldPad = IP; - } - } else { - WaterIter = IP; + NewWaterList.count(WaterBB)) && Growth < BestGrowth) { + // This is the least amount of required padding seen so far. + BestGrowth = Growth; + WaterIter = IP; + DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + << " Growth=" << Growth << '\n'); + + // Keep looking unless it is perfect. + if (BestGrowth == 0) return true; - } } if (IP == B) break; } - if (FoundWaterThatWouldPad) { - WaterIter = IPThatWouldPad; - return true; - } - return false; + return BestGrowth != ~0u; } -/// CreateNewWater - No existing WaterList entry will work for +/// createNewWater - No existing WaterList entry will work for /// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the /// block is used if in range, and the conditional branch munged so control /// flow is correct. Otherwise the block is split to create a hole with an /// unconditional branch around it. In either case NewMBB is set to a /// block following which the new island can be inserted (the WaterList /// is not adjusted). -void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, +void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; + unsigned CPELogAlign = getCPELogAlign(CPEMI); MachineBasicBlock *UserMBB = UserMI->getParent(); - unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] + - BBSizes[UserMBB->getNumber()]; - assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]); + const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()]; // If the block does not end in an unconditional branch already, and if the // end of the block is within range, make new water there. (The addition // below is for the unconditional branch we will be adding: 4 bytes on ARM + - // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for - // inside OffsetIsInRange. - if (BBHasFallthrough(UserMBB) && - OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4), - U.MaxDisp, U.NegOk, U.IsSoImm)) { - DEBUG(errs() << "Split at end of block\n"); - if (&UserMBB->back() == UserMI) - assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); - NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); - // Add an unconditional branch from UserMBB to fallthrough block. - // Record it for branch lengthening; this new branch will not get out of - // range, but if the preceding conditional branch is out of range, the - // targets will be exchanged, and the altered branch may be out of - // range, so the machinery has to know about it. - int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; - if (!isThumb) - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); - else - BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) - .addImm(ARMCC::AL).addReg(0); - unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); - ImmBranches.push_back(ImmBranch(&UserMBB->back(), - MaxDisp, false, UncondBr)); - int delta = isThumb1 ? 2 : 4; - BBSizes[UserMBB->getNumber()] += delta; - AdjustBBOffsetsAfter(UserMBB, delta); - } else { - // What a big block. Find a place within the block to split it. - // This is a little tricky on Thumb1 since instructions are 2 bytes - // and constant pool entries are 4 bytes: if instruction I references - // island CPE, and instruction I+1 references CPE', it will - // not work well to put CPE as far forward as possible, since then - // CPE' cannot immediately follow it (that location is 2 bytes - // farther away from I+1 than CPE was from I) and we'd need to create - // a new island. So, we make a first guess, then walk through the - // instructions between the one currently being looked at and the - // possible insertion point, and make sure any other instructions - // that reference CPEs will be able to use the same island area; - // if not, we back up the insertion point. - - // The 4 in the following is for the unconditional branch we'll be - // inserting (allows for long branch on Thumb1). Alignment of the - // island is handled inside OffsetIsInRange. - unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4; - // This could point off the end of the block if we've already got - // constant pool entries following this block; only the last one is - // in the water list. Back past any possible branches (allow for a - // conditional and a maximally long unconditional). - if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1]) - BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] - - (isThumb1 ? 6 : 8); - unsigned EndInsertOffset = BaseInsertOffset + - CPEMI->getOperand(2).getImm(); - MachineBasicBlock::iterator MI = UserMI; - ++MI; - unsigned CPUIndex = CPUserIndex+1; - unsigned NumCPUsers = CPUsers.size(); - MachineInstr *LastIT = 0; - for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); - Offset < BaseInsertOffset; - Offset += TII->GetInstSizeInBytes(MI), - MI = llvm::next(MI)) { - if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { - CPUser &U = CPUsers[CPUIndex]; - if (!OffsetIsInRange(Offset, EndInsertOffset, - U.MaxDisp, U.NegOk, U.IsSoImm)) { - BaseInsertOffset -= (isThumb1 ? 2 : 4); - EndInsertOffset -= (isThumb1 ? 2 : 4); - } - // This is overly conservative, as we don't account for CPEMIs - // being reused within the block, but it doesn't matter much. - EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm(); - CPUIndex++; - } + // Thumb2, 2 on Thumb1. + if (BBHasFallthrough(UserMBB)) { + // Size of branch to insert. + unsigned Delta = isThumb1 ? 2 : 4; + // End of UserBlock after adding a branch. + unsigned UserBlockEnd = UserBBI.postOffset() + Delta; + // Compute the offset where the CPE will begin. + unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign, + UserBBI.postKnownBits()); + + if (isOffsetInRange(UserOffset, CPEOffset, U)) { + DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() + << format(", expected CPE offset %#x\n", CPEOffset)); + NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); + // Add an unconditional branch from UserMBB to fallthrough block. Record + // it for branch lengthening; this new branch will not get out of range, + // but if the preceding conditional branch is out of range, the targets + // will be exchanged, and the altered branch may be out of range, so the + // machinery has to know about it. + int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; + if (!isThumb) + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB); + else + BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB) + .addImm(ARMCC::AL).addReg(0); + unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); + ImmBranches.push_back(ImmBranch(&UserMBB->back(), + MaxDisp, false, UncondBr)); + BBInfo[UserMBB->getNumber()].Size += Delta; + adjustBBOffsetsAfter(UserMBB); + return; + } + } - // Remember the last IT instruction. - if (MI->getOpcode() == ARM::t2IT) - LastIT = MI; + // What a big block. Find a place within the block to split it. This is a + // little tricky on Thumb1 since instructions are 2 bytes and constant pool + // entries are 4 bytes: if instruction I references island CPE, and + // instruction I+1 references CPE', it will not work well to put CPE as far + // forward as possible, since then CPE' cannot immediately follow it (that + // location is 2 bytes farther away from I+1 than CPE was from I) and we'd + // need to create a new island. So, we make a first guess, then walk through + // the instructions between the one currently being looked at and the + // possible insertion point, and make sure any other instructions that + // reference CPEs will be able to use the same island area; if not, we back + // up the insertion point. + + // Try to split the block so it's fully aligned. Compute the latest split + // point where we can add a 4-byte branch instruction, and then + // WorstCaseAlign to LogAlign. + unsigned LogAlign = MF->getAlignment(); + assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry"); + unsigned KnownBits = UserBBI.internalKnownBits(); + unsigned UPad = UnknownPadding(LogAlign, KnownBits); + unsigned BaseInsertOffset = UserOffset + U.getMaxDisp(); + DEBUG(dbgs() << format("Split in middle of big block before %#x", + BaseInsertOffset)); + + // Account for alignment and unknown padding. + BaseInsertOffset &= ~((1u << LogAlign) - 1); + BaseInsertOffset -= UPad; + + // The 4 in the following is for the unconditional branch we'll be inserting + // (allows for long branch on Thumb1). Alignment of the island is handled + // inside isOffsetInRange. + BaseInsertOffset -= 4; + + DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) + << " la=" << LogAlign + << " kb=" << KnownBits + << " up=" << UPad << '\n'); + + // This could point off the end of the block if we've already got constant + // pool entries following this block; only the last one is in the water list. + // Back past any possible branches (allow for a conditional and a maximally + // long unconditional). + if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset) + BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset - + (isThumb1 ? 6 : 8); + unsigned EndInsertOffset = + WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) + + CPEMI->getOperand(2).getImm(); + MachineBasicBlock::iterator MI = UserMI; + ++MI; + unsigned CPUIndex = CPUserIndex+1; + unsigned NumCPUsers = CPUsers.size(); + MachineInstr *LastIT = 0; + for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); + Offset < BaseInsertOffset; + Offset += TII->GetInstSizeInBytes(MI), + MI = llvm::next(MI)) { + if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { + CPUser &U = CPUsers[CPUIndex]; + if (!isOffsetInRange(Offset, EndInsertOffset, U)) { + // Shift intertion point by one unit of alignment so it is within reach. + BaseInsertOffset -= 1u << LogAlign; + EndInsertOffset -= 1u << LogAlign; + } + // This is overly conservative, as we don't account for CPEMIs being + // reused within the block, but it doesn't matter much. Also assume CPEs + // are added in order with alignment padding. We may eventually be able + // to pack the aligned CPEs better. + EndInsertOffset = RoundUpToAlignment(EndInsertOffset, + 1u << getCPELogAlign(U.CPEMI)) + + U.CPEMI->getOperand(2).getImm(); + CPUIndex++; } - DEBUG(errs() << "Split in middle of big block\n"); - --MI; + // Remember the last IT instruction. + if (MI->getOpcode() == ARM::t2IT) + LastIT = MI; + } + + --MI; - // Avoid splitting an IT block. - if (LastIT) { - unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); - if (CC != ARMCC::AL) - MI = LastIT; - } - NewMBB = SplitBlockBeforeInstr(MI); + // Avoid splitting an IT block. + if (LastIT) { + unsigned PredReg = 0; + ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); + if (CC != ARMCC::AL) + MI = LastIT; } + NewMBB = splitBlockBeforeInstr(MI); } -/// HandleConstantPoolUser - Analyze the specified user, checking to see if it +/// handleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, - unsigned CPUserIndex) { +bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); - // Compute this only once, it's expensive. The 4 or 8 is the value the - // hardware keeps in the PC. - unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); + // Compute this only once, it's expensive. + unsigned UserOffset = getUserOffset(U); // See if the current entry is within range, or there is a clone of it // in range. - int result = LookForExistingCPEntry(U, UserOffset); + int result = findInRangeCPEntry(U, UserOffset); if (result==1) return false; else if (result==2) return true; @@ -1260,11 +1400,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned ID = AFI->createPICLabelUId(); // Look for water where we can place this CPE. - MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); + MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; - if (LookForWater(U, UserOffset, IP)) { - DEBUG(errs() << "found water in range\n"); + if (findAvailableWater(U, UserOffset, IP)) { + DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; // If the original WaterList entry was "new water" on this iteration, @@ -1279,10 +1419,10 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, } else { // No water found. - DEBUG(errs() << "No water found\n"); - CreateNewWater(CPUserIndex, UserOffset, NewMBB); + DEBUG(dbgs() << "No water found\n"); + createNewWater(CPUserIndex, UserOffset, NewMBB); - // SplitBlockBeforeInstr adds to WaterList, which is important when it is + // splitBlockBeforeInstr adds to WaterList, which is important when it is // called while handling branches so that the water will be seen on the // next iteration for constant pools, but in this context, we don't want // it. Check for this so it will be removed from the WaterList. @@ -1304,13 +1444,13 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, WaterList.erase(IP); // Okay, we know we can put an island before NewMBB now, do it! - MF.insert(NewMBB, NewIsland); + MF->insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. - UpdateForInsertedWaterBlock(NewIsland); + updateForInsertedWaterBlock(NewIsland); // Decrement the old entry, and remove it if refcount becomes 0. - DecrementOldEntry(CPI, CPEMI); + decrementCPEReferenceCount(CPI, CPEMI); // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. @@ -1320,13 +1460,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); ++NumCPEs; - BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; - // Compensate for .align 2 in thumb mode. - if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm)) - Size += 2; + // Mark the basic block as aligned as required by the const-pool entry. + NewIsland->setAlignment(getCPELogAlign(U.CPEMI)); + // Increase the size of the island block to account for the new entry. - BBSizes[NewIsland->getNumber()] += Size; - AdjustBBOffsetsAfter(NewIsland, Size); + BBInfo[NewIsland->getNumber()].Size += Size; + adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1335,31 +1474,30 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, break; } - DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI - << '\t' << *UserMI); + DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI + << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset)); return true; } -/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update +/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update /// sizes and offsets of impacted basic blocks. -void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { +void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { MachineBasicBlock *CPEBB = CPEMI->getParent(); unsigned Size = CPEMI->getOperand(2).getImm(); CPEMI->eraseFromParent(); - BBSizes[CPEBB->getNumber()] -= Size; + BBInfo[CPEBB->getNumber()].Size -= Size; // All succeeding offsets have the current size value added in, fix this. if (CPEBB->empty()) { - // In thumb1 mode, the size of island may be padded by two to compensate for - // the alignment requirement. Then it will now be 2 when the block is - // empty, so fix this. - // All succeeding offsets have the current size value added in, fix this. - if (BBSizes[CPEBB->getNumber()] != 0) { - Size += BBSizes[CPEBB->getNumber()]; - BBSizes[CPEBB->getNumber()] = 0; - } - } - AdjustBBOffsetsAfter(CPEBB, -Size); + BBInfo[CPEBB->getNumber()].Size = 0; + + // This block no longer needs to be aligned. <rdar://problem/10534709>. + CPEBB->setAlignment(0); + } else + // Entries are sorted by descending alignment, so realign from the front. + CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); + + adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if // this BB's predecessor jumps directly to this BB's successor. This // shouldn't happen currently. @@ -1367,15 +1505,15 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { // FIXME: remove the empty blocks after all the work is done? } -/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts +/// removeUnusedCPEntries - Remove constant pool entries whose refcounts /// are zero. -bool ARMConstantIslands::RemoveUnusedCPEntries() { +bool ARMConstantIslands::removeUnusedCPEntries() { unsigned MadeChange = false; for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { std::vector<CPEntry> &CPEs = CPEntries[i]; for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { - RemoveDeadCPEMI(CPEs[j].CPEMI); + removeDeadCPEMI(CPEs[j].CPEMI); CPEs[j].CPEMI = NULL; MadeChange = true; } @@ -1384,18 +1522,18 @@ bool ARMConstantIslands::RemoveUnusedCPEntries() { return MadeChange; } -/// BBIsInRange - Returns true if the distance between specific MI and +/// isBBInRange - Returns true if the distance between specific MI and /// specific BB can fit in MI's displacement field. -bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, +bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) { unsigned PCAdj = isThumb ? 4 : 8; - unsigned BrOffset = GetOffsetOf(MI) + PCAdj; - unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + unsigned BrOffset = getOffsetOf(MI) + PCAdj; + unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; - DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber() + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() << " from BB#" << MI->getParent()->getNumber() << " max delta=" << MaxDisp - << " from " << GetOffsetOf(MI) << " to " << DestOffset + << " from " << getOffsetOf(MI) << " to " << DestOffset << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); if (BrOffset <= DestOffset) { @@ -1409,50 +1547,50 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, return false; } -/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far +/// fixupImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { +bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); // Check to see if the DestBB is already in-range. - if (BBIsInRange(MI, DestBB, Br.MaxDisp)) + if (isBBInRange(MI, DestBB, Br.MaxDisp)) return false; if (!Br.isCond) - return FixUpUnconditionalBr(MF, Br); - return FixUpConditionalBr(MF, Br); + return fixupUnconditionalBr(Br); + return fixupConditionalBr(Br); } -/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is +/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is /// too far away to fit in its displacement field. If the LR register has been /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { +ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); if (!isThumb1) - llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!"); + llvm_unreachable("fixupUnconditionalBr is Thumb1 only!"); // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; MI->setDesc(TII->get(ARM::tBfar)); - BBSizes[MBB->getNumber()] += 2; - AdjustBBOffsetsAfter(MBB, 2); + BBInfo[MBB->getNumber()].Size += 2; + adjustBBOffsetsAfter(MBB); HasFarJump = true; ++NumUBrFixed; - DEBUG(errs() << " Changed B to long jump " << *MI); + DEBUG(dbgs() << " Changed B to long jump " << *MI); return true; } -/// FixUpConditionalBr - Fix up a conditional branch whose destination is too +/// fixupConditionalBr - Fix up a conditional branch whose destination is too /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { +ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1486,8 +1624,8 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { // bne L2 // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { - DEBUG(errs() << " Invert Bcc condition and swap its destination with " + if (isBBInRange(MI, NewDest, Br.MaxDisp)) { + DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); BMI->getOperand(0).setMBB(DestBB); MI->getOperand(0).setMBB(NewDest); @@ -1498,19 +1636,17 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { } if (NeedSplit) { - SplitBlockBeforeInstr(MI); + splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); - BBSizes[MBB->getNumber()] -= delta; - MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB)); - AdjustBBOffsetsAfter(SplitBB, -delta); + BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); - // BBOffsets[SplitBB] is wrong temporarily, fixed below + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); - DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber() + DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() << " also invert condition and change dest. to BB#" << NextBB->getNumber() << "\n"); @@ -1519,30 +1655,27 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode())) .addMBB(NextBB).addImm(CC).addReg(CCReg); Br.MI = &MBB->back(); - BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); if (isThumb) BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB) .addImm(ARMCC::AL).addReg(0); else BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB); - BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back()); + BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr); ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr)); // Remove the old conditional branch. It may or may not still be in MBB. - BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI); + BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); MI->eraseFromParent(); - - // The net size change is an addition of one unconditional branch. - int delta = TII->GetInstSizeInBytes(&MBB->back()); - AdjustBBOffsetsAfter(MBB, delta); + adjustBBOffsetsAfter(MBB); return true; } -/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills +/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills /// LR / restores LR to pc. FIXME: This is done here because it's only possible /// to do this if tBfar is not used. -bool ARMConstantIslands::UndoLRSpillRestore() { +bool ARMConstantIslands::undoLRSpillRestore() { bool MadeChange = false; for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { MachineInstr *MI = PushPopMIs[i]; @@ -1561,7 +1694,26 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { +// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions +// below may shrink MI. +bool +ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const { + switch(MI->getOpcode()) { + // optimizeThumb2Instructions. + case ARM::t2LEApcrel: + case ARM::t2LDRpci: + // optimizeThumb2Branches. + case ARM::t2B: + case ARM::t2Bcc: + case ARM::tBcc: + // optimizeThumb2JumpTables. + case ARM::t2BR_JT: + return true; + } + return false; +} + +bool ARMConstantIslands::optimizeThumb2Instructions() { bool MadeChange = false; // Shrink ADR and LDR from constantpool. @@ -1592,25 +1744,31 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { if (!NewOpc) continue; - unsigned UserOffset = GetOffsetOf(U.MI) + 4; + unsigned UserOffset = getUserOffset(U); unsigned MaxOffs = ((1 << Bits) - 1) * Scale; + + // Be conservative with inline asm. + if (!U.KnownAlignment) + MaxOffs -= 2; + // FIXME: Check if offset is multiple of scale if scale is not 4. - if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + DEBUG(dbgs() << "Shrink: " << *U.MI); U.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = U.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + adjustBBOffsetsAfter(MBB); ++NumT2CPShrunk; MadeChange = true; } } - MadeChange |= OptimizeThumb2Branches(MF); - MadeChange |= OptimizeThumb2JumpTables(MF); + MadeChange |= optimizeThumb2Branches(); + MadeChange |= optimizeThumb2JumpTables(); return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { +bool ARMConstantIslands::optimizeThumb2Branches() { bool MadeChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { @@ -1636,11 +1794,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { if (NewOpc) { unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); - if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { + if (isBBInRange(Br.MI, DestBB, MaxOffs)) { + DEBUG(dbgs() << "Shrink branch: " << *Br.MI); Br.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = Br.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + adjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; MadeChange = true; } @@ -1650,9 +1809,14 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { if (Opcode != ARM::tBcc) continue; + // If the conditional branch doesn't kill CPSR, then CPSR can be liveout + // so this transformation is not safe. + if (!Br.MI->killsRegister(ARM::CPSR)) + continue; + NewOpc = 0; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) @@ -1662,27 +1826,28 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. - unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; - unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2; + unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { MachineBasicBlock::iterator CmpMI = Br.MI; if (CmpMI != Br.MI->getParent()->begin()) { --CmpMI; if (CmpMI->getOpcode() == ARM::tCMPi8) { unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = llvm::getInstrPredicate(CmpMI, PredReg); + Pred = getInstrPredicate(CmpMI, PredReg); if (Pred == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && isARMLowRegister(Reg)) { MachineBasicBlock *MBB = Br.MI->getParent(); + DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); MachineInstr *NewBR = BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags()); CmpMI->eraseFromParent(); Br.MI->eraseFromParent(); Br.MI = NewBR; - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); + BBInfo[MBB->getNumber()].Size -= 2; + adjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; } @@ -1694,14 +1859,14 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { return MadeChange; } -/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller +/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. -bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { +bool ARMConstantIslands::optimizeThumb2JumpTables() { bool MadeChange = false; // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -1709,18 +1874,18 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); bool ByteOk = true; bool HalfWordOk = true; - unsigned JTOffset = GetOffsetOf(MI) + 4; + unsigned JTOffset = getOffsetOf(MI) + 4; const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { MachineBasicBlock *MBB = JTBBs[j]; - unsigned DstOffset = BBOffsets[MBB->getNumber()]; + unsigned DstOffset = BBInfo[MBB->getNumber()].Offset; // Negative offset is not ok. FIXME: We should change BB layout to make // sure all the branches are forward. if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2) @@ -1791,11 +1956,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { if (!OptOk) continue; + DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI + << " lea: " << *LeaMI); unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) .addImm(MI->getOperand(JTOpIdx+1).getImm()); + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); // FIXME: Insert an "ALIGN" instruction to ensure the next instruction // is 2-byte aligned. For now, asm printer will fix it up. unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); @@ -1808,8 +1976,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { MI->eraseFromParent(); int delta = OrigSize - NewSize; - BBSizes[MBB->getNumber()] -= delta; - AdjustBBOffsetsAfter(MBB, -delta); + BBInfo[MBB->getNumber()].Size -= delta; + adjustBBOffsetsAfter(MBB); ++NumTBs; MadeChange = true; @@ -1819,12 +1987,12 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { return MadeChange; } -/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that +/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that /// jump tables always branch forwards, since that's what tbb and tbh need. -bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { +bool ARMConstantIslands::reorderThumb2JumpTables() { bool MadeChange = false; - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); if (MJTI == 0) return false; const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); @@ -1832,7 +2000,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineInstr *MI = T2JumpTables[i]; const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); - unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2); + unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2); MachineOperand JTOP = MI->getOperand(JTOpIdx); unsigned JTI = JTOP.getIndex(); assert(JTI < JT.size()); @@ -1850,7 +2018,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { // The destination precedes the switch. Try to move the block forward // so we have a positive offset. MachineBasicBlock *NewBB = - AdjustJTTargetBlockForward(MBB, MI->getParent()); + adjustJTTargetBlockForward(MBB, MI->getParent()); if (NewBB) MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB); MadeChange = true; @@ -1862,10 +2030,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { } MachineBasicBlock *ARMConstantIslands:: -AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) -{ - MachineFunction &MF = *BB->getParent(); - +adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple @@ -1882,22 +2047,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) // If the block ends in an unconditional branch, move it. The prior block // has to have an analyzable terminator for us to move this one. Be paranoid // and make sure we're not trying to move the entry block of the function. - if (!B && Cond.empty() && BB != MF.begin() && + if (!B && Cond.empty() && BB != MF->begin() && !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { BB->moveAfter(JTBB); OldPrior->updateTerminator(); BB->updateTerminator(); // Update numbering to account for the block being moved. - MF.RenumberBlocks(); + MF->RenumberBlocks(); ++NumJTMoved; return NULL; } // Create a new MBB for the code after the jump BB. MachineBasicBlock *NewBB = - MF.CreateMachineBasicBlock(JTBB->getBasicBlock()); + MF->CreateMachineBasicBlock(JTBB->getBasicBlock()); MachineFunction::iterator MBBI = JTBB; ++MBBI; - MF.insert(MBBI, NewBB); + MF->insert(MBBI, NewBB); // Add an unconditional branch from NewBB to BB. // There doesn't seem to be meaningful DebugInfo available; this doesn't @@ -1907,7 +2072,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) .addImm(ARMCC::AL).addReg(0); // Update internal data structures to account for the newly inserted MBB. - MF.RenumberBlocks(NewBB); + MF->RenumberBlocks(NewBB); // Update the CFG. NewBB->addSuccessor(BB); diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index aadfd47..fa3226e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -1,4 +1,4 @@ -//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===// +//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===// // // The LLVM Compiler Infrastructure // @@ -48,7 +48,6 @@ ARMConstantPoolValue::~ARMConstantPoolValue() {} const char *ARMConstantPoolValue::getModifierText() const { switch (Modifier) { - default: llvm_unreachable("Unknown modifier!"); // FIXME: Are these case sensitive? It'd be nice to lower-case all the // strings if that's legal. case ARMCP::no_modifier: return "none"; @@ -58,12 +57,12 @@ const char *ARMConstantPoolValue::getModifierText() const { case ARMCP::GOTTPOFF: return "gottpoff"; case ARMCP::TPOFF: return "tpoff"; } + llvm_unreachable("Unknown modifier!"); } int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - assert(false && "Shouldn't be calling this directly!"); - return -1; + llvm_unreachable("Shouldn't be calling this directly!"); } void @@ -315,5 +314,6 @@ void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) { } void ARMConstantPoolMBB::print(raw_ostream &O) const { + O << "BB#" << MBB->getNumber(); ARMConstantPoolValue::print(O); } diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 0d0def3..6b98d44 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -1,4 +1,4 @@ -//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===// +//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp index 51e68b4..f671317 100644 --- a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.cpp @@ -41,43 +41,38 @@ unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { case ARM::reloc_arm_machine_cp_entry: case ARM::reloc_arm_jt_base: case ARM::reloc_arm_pic_jt: - assert(0 && "unsupported ARM relocation type"); break; - - case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break; - case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; break; - case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; break; + llvm_unreachable("unsupported ARM relocation type"); + + case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; + case ARM::reloc_arm_movt: return ELF::R_ARM_MOVT_ABS; + case ARM::reloc_arm_movw: return ELF::R_ARM_MOVW_ABS_NC; default: - llvm_unreachable("unknown ARM relocation type"); break; + llvm_unreachable("unknown ARM relocation type"); } - return 0; } long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, long int Modifier) const { - assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not " + "implemented"); } unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const { - assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented"); } bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { - assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented"); - return 1; + llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented"); } unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const { - assert(0 && - "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " + "implemented"); } long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelOffset, unsigned RelTy) const { - assert(0 && - "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented"); - return 0; + llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not " + "implemented"); } diff --git a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h index 1c4e532..6a84f8a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMELFWriterInfo.h @@ -17,6 +17,7 @@ #include "llvm/Target/TargetELFWriterInfo.h" namespace llvm { + class TargetMachine; class ARMELFWriterInfo : public TargetELFWriterInfo { public: diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 7872cb9..5fc0360 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1,4 +1,4 @@ -//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=// +//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// // // The LLVM Compiler Infrastructure // @@ -19,7 +19,6 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -61,7 +60,7 @@ namespace { void ExpandVST(MachineBasicBlock::iterator &MBBI); void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); void ExpandVTBL(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool IsExt, unsigned NumRegs); + unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); }; @@ -99,13 +98,20 @@ namespace { // Entries for NEON load/store information table. The table is sorted by // PseudoOpc for fast binary-search lookups. struct NEONLdStTableEntry { - unsigned PseudoOpc; - unsigned RealOpc; + uint16_t PseudoOpc; + uint16_t RealOpc; bool IsLoad; - bool HasWriteBack; + bool isUpdating; + bool hasWritebackOperand; NEONRegSpacing RegSpacing; unsigned char NumRegs; // D registers loaded or stored unsigned char RegElts; // elements per D register; used for lane ops + // FIXME: Temporary flag to denote whether the real instruction takes + // a single register (like the encoding) or all of the registers in + // the list (like the asm syntax and the isel DAG). When all definitions + // are converted to take only the single encoded register, this will + // go away. + bool copyAllListRegs; // Comparison methods for binary search of the table. bool operator<(const NEONLdStTableEntry &TE) const { @@ -122,243 +128,203 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { -{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4}, -{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4}, -{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2}, -{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2}, -{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8}, -{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8}, - -{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 }, -{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 }, -{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 }, -{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 }, -{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 }, -{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 }, - -{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 }, -{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 }, -{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 }, -{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 }, - -{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 }, -{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 }, -{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 }, -{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 }, -{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 }, -{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 }, -{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 }, -{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 }, - -{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4}, -{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4}, -{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2}, -{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2}, -{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8}, -{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8}, - -{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 }, -{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 }, -{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 }, -{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 }, -{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 }, -{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 }, -{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 }, -{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 }, -{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 }, -{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 }, - -{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 }, -{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 }, -{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 }, -{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 }, -{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 }, -{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 }, - -{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 }, -{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 }, -{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 }, -{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 }, -{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 }, -{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 }, - -{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4}, -{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4}, -{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2}, -{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2}, -{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8}, -{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8}, - -{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 }, -{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 }, -{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 }, -{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 }, -{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 }, -{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 }, -{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 }, -{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 }, -{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 }, -{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 }, - -{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 }, -{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 }, -{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 }, -{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 }, -{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 }, -{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 }, - -{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 }, -{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 }, -{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 }, -{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 }, -{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 }, -{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 }, -{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 }, -{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 }, -{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 }, - -{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4}, -{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4}, -{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2}, -{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2}, -{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8}, -{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8}, - -{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 }, -{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 }, -{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 }, -{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 }, -{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 }, -{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 }, -{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 }, -{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 }, -{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 }, -{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 }, - -{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 }, -{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 }, -{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 }, -{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 }, -{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 }, -{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 }, - -{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 }, -{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 }, -{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 }, -{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 }, -{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 }, -{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 }, -{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 }, -{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 }, -{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 }, - -{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 }, -{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 }, -{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 }, -{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 }, -{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 }, -{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 }, - -{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 }, -{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 }, -{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 }, -{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 }, - -{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 }, -{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 }, -{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 }, -{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 }, -{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 }, -{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 }, -{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 }, -{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 }, - -{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 }, -{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 }, -{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 }, -{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 }, -{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 }, -{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 }, -{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4}, -{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4}, -{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2}, -{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2}, - -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 }, -{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 }, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 }, -{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 }, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 }, -{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 }, - -{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 }, -{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 }, -{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 }, -{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 }, -{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 }, -{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 }, - -{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 }, -{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 }, -{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 }, -{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 }, -{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 }, -{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 }, -{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4}, -{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4}, -{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2}, -{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2}, - -{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 }, -{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 }, -{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 }, -{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 }, -{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 }, -{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 }, - -{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 }, -{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 }, -{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 }, -{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 }, -{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 }, -{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 }, -{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 }, -{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 }, -{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 }, - -{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 }, -{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 }, -{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 }, -{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 }, -{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 }, -{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 }, -{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4}, -{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4}, -{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2}, -{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2}, - -{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 }, -{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 }, -{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 }, -{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 }, -{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 }, -{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 }, - -{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 }, -{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 }, -{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 }, -{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 }, -{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 }, -{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 }, -{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 }, -{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 }, -{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 } +{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, +{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, +{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, +{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true}, +{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true}, +{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, + +{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, +{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, + +{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true}, +{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true}, +{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true}, +{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, +{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, + +{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, + +{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, +{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, +{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true}, +{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true}, +{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true}, +{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true}, + +{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true}, +{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, + +{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, + +{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true}, +{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, +{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, +{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true}, +{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, +{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true}, +{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true}, +{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, + +{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true}, +{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true}, +{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true}, +{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true}, +{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true}, +{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true}, + +{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true}, +{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, + +{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true}, +{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, +{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, +{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true}, +{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, +{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true}, +{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true}, +{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, + +{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true}, +{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true}, +{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true}, +{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true}, +{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, +{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, + +{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, +{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, +{ ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, +{ ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, + +{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, +{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true}, +{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true}, +{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, +{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, + +{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, + +{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, +{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true}, +{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true}, +{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true}, +{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true}, + +{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, + +{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true}, +{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true}, +{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true}, +{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true}, +{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true}, +{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true}, +{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true}, +{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true}, + +{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true}, +{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true}, +{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true}, +{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true}, + +{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true}, +{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true}, +{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true}, +{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true}, +{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true}, +{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true}, +{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true}, +{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true} }; /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON /// load or store pseudo instruction. static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { - unsigned NumEntries = array_lengthof(NEONLdStTable); + const unsigned NumEntries = array_lengthof(NEONLdStTable); #ifndef NDEBUG // Make sure the table is sorted. @@ -422,21 +388,22 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { unsigned DstReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); - if (NumRegs > 2) + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 1 && TableEntry->copyAllListRegs) + MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 2 && TableEntry->copyAllListRegs) MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); - if (NumRegs > 3) + if (NumRegs > 3 && TableEntry->copyAllListRegs) MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); - if (TableEntry->HasWriteBack) + if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWriteBack) + if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // For an instruction writing double-spaced subregs, the pseudo instruction @@ -481,24 +448,26 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; - if (TableEntry->HasWriteBack) + if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWriteBack) + if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0).addReg(D1); - if (NumRegs > 2) + MIB.addReg(D0); + if (NumRegs > 1 && TableEntry->copyAllListRegs) + MIB.addReg(D1); + if (NumRegs > 2 && TableEntry->copyAllListRegs) MIB.addReg(D2); - if (NumRegs > 3) + if (NumRegs > 3 && TableEntry->copyAllListRegs) MIB.addReg(D3); // Copy the predicate operands. @@ -558,14 +527,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); } - if (TableEntry->HasWriteBack) + if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWriteBack) + if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // Grab the super-register source. @@ -599,13 +568,15 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); } /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ /// register operands to real instructions with D register operands. void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, - unsigned Opc, bool IsExt, unsigned NumRegs) { + unsigned Opc, bool IsExt) { MachineInstr &MI = *MBBI; MachineBasicBlock &MBB = *MI.getParent(); @@ -621,11 +592,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0).addReg(D1); - if (NumRegs > 2) - MIB.addReg(D2); - if (NumRegs > 3) - MIB.addReg(D3); + MIB.addReg(D0); // Copy the other source register operand. MIB.addOperand(MI.getOperand(OpIdx++)); @@ -645,7 +612,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; @@ -809,7 +776,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::Int_eh_sjlj_dispatchsetup: { + case ARM::Int_eh_sjlj_dispatchsetup: + case ARM::Int_eh_sjlj_dispatchsetup_nofp: + case ARM::tInt_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); @@ -824,15 +793,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, "base pointer without frame pointer?"); if (AFI->isThumb2Function()) { - llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *TII); + emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } else if (AFI->isThumbFunction()) { - llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, *TII, RI); + emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *TII, RI); } else { - llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, - *TII); + emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, + *TII); } // If there's dynamic realignment, adjust for it. if (RI.needsStackRealignment(MF)) { @@ -996,6 +965,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); return true; } @@ -1026,6 +996,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI.eraseFromParent(); return true; } @@ -1057,26 +1028,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VLD1q8Pseudo: - case ARM::VLD1q16Pseudo: - case ARM::VLD1q32Pseudo: - case ARM::VLD1q64Pseudo: - case ARM::VLD1q8Pseudo_UPD: - case ARM::VLD1q16Pseudo_UPD: - case ARM::VLD1q32Pseudo_UPD: - case ARM::VLD1q64Pseudo_UPD: - case ARM::VLD2d8Pseudo: - case ARM::VLD2d16Pseudo: - case ARM::VLD2d32Pseudo: case ARM::VLD2q8Pseudo: case ARM::VLD2q16Pseudo: case ARM::VLD2q32Pseudo: - case ARM::VLD2d8Pseudo_UPD: - case ARM::VLD2d16Pseudo_UPD: - case ARM::VLD2d32Pseudo_UPD: - case ARM::VLD2q8Pseudo_UPD: - case ARM::VLD2q16Pseudo_UPD: - case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD2q8PseudoWB_fixed: + case ARM::VLD2q16PseudoWB_fixed: + case ARM::VLD2q32PseudoWB_fixed: + case ARM::VLD2q8PseudoWB_register: + case ARM::VLD2q16PseudoWB_register: + case ARM::VLD2q32PseudoWB_register: case ARM::VLD3d8Pseudo: case ARM::VLD3d16Pseudo: case ARM::VLD3d32Pseudo: @@ -1084,7 +1044,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: - case ARM::VLD1d64TPseudo_UPD: case ARM::VLD3q8Pseudo_UPD: case ARM::VLD3q16Pseudo_UPD: case ARM::VLD3q32Pseudo_UPD: @@ -1101,7 +1060,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: - case ARM::VLD1d64QPseudo_UPD: case ARM::VLD4q8Pseudo_UPD: case ARM::VLD4q16Pseudo_UPD: case ARM::VLD4q32Pseudo_UPD: @@ -1111,18 +1069,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD: - case ARM::VLD1DUPq8Pseudo: - case ARM::VLD1DUPq16Pseudo: - case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8Pseudo_UPD: - case ARM::VLD1DUPq16Pseudo_UPD: - case ARM::VLD1DUPq32Pseudo_UPD: - case ARM::VLD2DUPd8Pseudo: - case ARM::VLD2DUPd16Pseudo: - case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8Pseudo_UPD: - case ARM::VLD2DUPd16Pseudo_UPD: - case ARM::VLD2DUPd32Pseudo_UPD: case ARM::VLD3DUPd8Pseudo: case ARM::VLD3DUPd16Pseudo: case ARM::VLD3DUPd32Pseudo: @@ -1138,26 +1084,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandVLD(MBBI); return true; - case ARM::VST1q8Pseudo: - case ARM::VST1q16Pseudo: - case ARM::VST1q32Pseudo: - case ARM::VST1q64Pseudo: - case ARM::VST1q8Pseudo_UPD: - case ARM::VST1q16Pseudo_UPD: - case ARM::VST1q32Pseudo_UPD: - case ARM::VST1q64Pseudo_UPD: - case ARM::VST2d8Pseudo: - case ARM::VST2d16Pseudo: - case ARM::VST2d32Pseudo: case ARM::VST2q8Pseudo: case ARM::VST2q16Pseudo: case ARM::VST2q32Pseudo: - case ARM::VST2d8Pseudo_UPD: - case ARM::VST2d16Pseudo_UPD: - case ARM::VST2d32Pseudo_UPD: - case ARM::VST2q8Pseudo_UPD: - case ARM::VST2q16Pseudo_UPD: - case ARM::VST2q32Pseudo_UPD: + case ARM::VST2q8PseudoWB_fixed: + case ARM::VST2q16PseudoWB_fixed: + case ARM::VST2q32PseudoWB_fixed: + case ARM::VST2q8PseudoWB_register: + case ARM::VST2q16PseudoWB_register: + case ARM::VST2q32PseudoWB_register: case ARM::VST3d8Pseudo: case ARM::VST3d16Pseudo: case ARM::VST3d32Pseudo: @@ -1165,7 +1100,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST3d8Pseudo_UPD: case ARM::VST3d16Pseudo_UPD: case ARM::VST3d32Pseudo_UPD: - case ARM::VST1d64TPseudo_UPD: + case ARM::VST1d64TPseudoWB_fixed: + case ARM::VST1d64TPseudoWB_register: case ARM::VST3q8Pseudo_UPD: case ARM::VST3q16Pseudo_UPD: case ARM::VST3q32Pseudo_UPD: @@ -1182,7 +1118,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST4d8Pseudo_UPD: case ARM::VST4d16Pseudo_UPD: case ARM::VST4d32Pseudo_UPD: - case ARM::VST1d64QPseudo_UPD: + case ARM::VST1d64QPseudoWB_fixed: + case ARM::VST1d64QPseudoWB_register: case ARM::VST4q8Pseudo_UPD: case ARM::VST4q16Pseudo_UPD: case ARM::VST4q32Pseudo_UPD: @@ -1270,15 +1207,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandLaneOp(MBBI); return true; - case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true; - case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true; - case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true; - case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true; - case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true; - case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true; + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; + case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; + case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; + case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; } - - return false; } bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index dc8e54d..2e1eaca 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -16,7 +16,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" -#include "ARMRegisterInfo.h" #include "ARMTargetMachine.h" #include "ARMSubtarget.h" #include "ARMConstantPoolValue.h" @@ -37,7 +36,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -90,7 +88,7 @@ class ARMFastISel : public FastISel { ARMFunctionInfo *AFI; // Convenience variables to avoid some queries. - bool isThumb; + bool isThumb2; LLVMContext *Context; public: @@ -101,7 +99,7 @@ class ARMFastISel : public FastISel { TLI(*TM.getTargetLowering()) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); - isThumb = AFI->isThumbFunction(); + isThumb2 = AFI->isThumbFunction(); Context = &funcInfo.Fn->getContext(); } @@ -148,6 +146,8 @@ class ARMFastISel : public FastISel { virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); + virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); #include "ARMGenFastISel.inc" @@ -156,27 +156,40 @@ class ARMFastISel : public FastISel { bool SelectLoad(const Instruction *I); bool SelectStore(const Instruction *I); bool SelectBranch(const Instruction *I); + bool SelectIndirectBr(const Instruction *I); bool SelectCmp(const Instruction *I); bool SelectFPExt(const Instruction *I); bool SelectFPTrunc(const Instruction *I); - bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); - bool SelectSIToFP(const Instruction *I); - bool SelectFPToSI(const Instruction *I); - bool SelectSDiv(const Instruction *I); - bool SelectSRem(const Instruction *I); - bool SelectCall(const Instruction *I); + bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode); + bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode); + bool SelectIToFP(const Instruction *I, bool isSigned); + bool SelectFPToI(const Instruction *I, bool isSigned); + bool SelectDiv(const Instruction *I, bool isSigned); + bool SelectRem(const Instruction *I, bool isSigned); + bool SelectCall(const Instruction *I, const char *IntrMemName); + bool SelectIntrinsicCall(const IntrinsicInst &I); bool SelectSelect(const Instruction *I); bool SelectRet(const Instruction *I); - bool SelectIntCast(const Instruction *I); + bool SelectTrunc(const Instruction *I); + bool SelectIntExt(const Instruction *I); // Utility routines. private: bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); - bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); + bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, + bool isZExt); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + unsigned Alignment = 0, bool isZExt = true, + bool allocReg = true); + + bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment = 0); bool ARMComputeAddress(const Value *Obj, Address &Addr); - void ARMSimplifyAddress(Address &Addr, EVT VT); + void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); + bool ARMIsMemCpySmall(uint64_t Len); + bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len); + unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); @@ -186,8 +199,6 @@ class ARMFastISel : public FastISel { // Call handling routines. private: - bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, - unsigned &ResultReg); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, SmallVectorImpl<unsigned> &ArgRegs, @@ -208,7 +219,7 @@ class ARMFastISel : public FastISel { const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); void AddLoadStoreOperands(EVT VT, Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags); + unsigned Flags, bool useAM3); }; } // end anonymous namespace @@ -219,8 +230,7 @@ class ARMFastISel : public FastISel { // we don't care about implicit defs here, just places we'll need to add a // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.hasOptionalDef()) + if (!MI->hasOptionalDef()) return false; // Look to see if our OptionalDef is defining CPSR or CCR. @@ -290,10 +300,10 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -310,11 +320,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill)); @@ -333,12 +343,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addReg(Op2, Op2IsKill * RegState::Kill)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) @@ -357,11 +367,11 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm)); @@ -379,11 +389,11 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm)); @@ -402,12 +412,12 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) @@ -425,10 +435,10 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addImm(Imm)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(Imm)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -444,10 +454,10 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - if (II.getNumDefs() >= 1) + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addImm(Imm1).addImm(Imm2)); - else { + } else { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addImm(Imm1).addImm(Imm2)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -464,9 +474,10 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(TargetRegisterInfo::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(TargetOpcode::COPY), ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill), Idx)); + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx)); return ResultReg; } @@ -477,7 +488,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::VMOVRS), MoveReg) + TII.get(ARM::VMOVSR), MoveReg) .addReg(SrcReg)); return MoveReg; } @@ -487,7 +498,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::VMOVSR), MoveReg) + TII.get(ARM::VMOVRS), MoveReg) .addReg(SrcReg)); return MoveReg; } @@ -541,22 +552,42 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { - // For now 32-bit only. - if (VT != MVT::i32) return false; - - unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) + return false; // If we can do this in a single instruction without a constant pool entry // do so now. const ConstantInt *CI = cast<ConstantInt>(C); - if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { - unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; + if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { + unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16; + unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(Opc), DestReg) - .addImm(CI->getSExtValue())); - return DestReg; + TII.get(Opc), ImmReg) + .addImm(CI->getZExtValue())); + return ImmReg; } + // Use MVN to emit negative constants. + if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) { + unsigned Imm = (unsigned)~(CI->getSExtValue()); + bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + (ARM_AM::getSOImmVal(Imm) != -1); + if (UseImm) { + unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; + unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ImmReg) + .addImm(Imm)); + return ImmReg; + } + } + + // Load from constant pool. For now 32-bit only. + if (VT != MVT::i32) + return false; + + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + // MachineConstantPool wants an explicit alignment. unsigned Align = TD.getPrefTypeAlignment(C->getType()); if (Align == 0) { @@ -565,7 +596,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { } unsigned Idx = MCP.getConstantPoolIndex(C, Align); - if (isThumb) + if (isThumb2) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRpci), DestReg) .addConstantPoolIndex(Idx)); @@ -586,44 +617,69 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { Reloc::Model RelocM = TM.getRelocationModel(); // TODO: Need more magic for ARM PIC. - if (!isThumb && (RelocM == Reloc::PIC_)) return 0; - - // MachineConstantPool wants an explicit alignment. - unsigned Align = TD.getPrefTypeAlignment(GV->getType()); - if (Align == 0) { - // TODO: Figure out if this is correct. - Align = TD.getTypeAllocSize(GV->getType()); - } + if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; - // Grab index. - unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); - unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, - ARMCP::CPValue, - PCAdj); - unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); - - // Load value. - MachineInstrBuilder MIB; unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); - if (isThumb) { - unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) - .addConstantPoolIndex(Idx); - if (RelocM == Reloc::PIC_) - MIB.addImm(Id); + + // Use movw+movt when possible, it avoids constant pool entries. + // Darwin targets don't support movt with Reloc::Static, see + // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support + // static movt relocations. + if (Subtarget->useMovt() && + Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) { + unsigned Opc; + switch (RelocM) { + case Reloc::PIC_: + Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel; + break; + case Reloc::DynamicNoPIC: + Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn; + break; + default: + Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm; + break; + } + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), + DestReg).addGlobalAddress(GV)); } else { - // The extra immediate is for addrmode2. - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), - DestReg) - .addConstantPoolIndex(Idx) - .addImm(0); + // MachineConstantPool wants an explicit alignment. + unsigned Align = TD.getPrefTypeAlignment(GV->getType()); + if (Align == 0) { + // TODO: Figure out if this is correct. + Align = TD.getTypeAllocSize(GV->getType()); + } + + // Grab index. + unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : + (Subtarget->isThumb() ? 4 : 8); + unsigned Id = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, + ARMCP::CPValue, + PCAdj); + unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); + + // Load value. + MachineInstrBuilder MIB; + if (isThumb2) { + unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + .addConstantPoolIndex(Idx); + if (RelocM == Reloc::PIC_) + MIB.addImm(Id); + } else { + // The extra immediate is for addrmode2. + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), + DestReg) + .addConstantPoolIndex(Idx) + .addImm(0); + } + AddOptionalDefs(MIB); } - AddOptionalDefs(MIB); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { + MachineInstrBuilder MIB; unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); - if (isThumb) + if (isThumb2) MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRi12), NewDestReg) .addReg(DestReg) @@ -656,6 +712,8 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { return 0; } +// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF); + unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; @@ -669,10 +727,10 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // This will get lowered later into the correct offsets and registers // via rewriteXFrameIndex. if (SI != FuncInfo.StaticAllocaMap.end()) { - TargetRegisterClass* RC = TLI.getRegClassFor(VT); + const TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); - unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(SI->second) .addImm(0)); @@ -699,7 +757,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. - if (VT == MVT::i8 || VT == MVT::i16) + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) return true; return false; @@ -813,35 +871,33 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { } } - // Materialize the global variable's address into a reg which can - // then be used later to load the variable. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { - unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); - if (Tmp == 0) return false; - - Addr.Base.Reg = Tmp; - return true; - } - // Try to get this in a register if nothing else has worked. if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); return Addr.Base.Reg != 0; } -void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { +void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { assert(VT.isSimple() && "Non-simple types are invalid here!"); bool needsLowering = false; switch (VT.getSimpleVT().SimpleTy) { - default: - assert(false && "Unhandled load/store type!"); + default: llvm_unreachable("Unhandled load/store type!"); case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: - // Integer loads/stores handle 12-bit offsets. - needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); + if (!useAM3) { + // Integer loads/stores handle 12-bit offsets. + needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); + // Handle negative offsets. + if (needsLowering && isThumb2) + needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 && + Addr.Offset > -256); + } else { + // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. + needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); + } break; case MVT::f32: case MVT::f64: @@ -854,11 +910,11 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { // put the alloca address into a register, set the base type back to // register and continue. This should almost never happen. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { - TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : - ARM::GPRRegisterClass; + const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass + : ARM::GPRRegisterClass; unsigned ResultReg = createResultReg(RC); - unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, + unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(Addr.Base.FI) .addImm(0)); @@ -877,7 +933,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags) { + unsigned Flags, bool useAM3) { // addrmode5 output depends on the selection dag addressing dividing the // offset by 4 that it then later multiplies. Do this here as well. if (VT.getSimpleVT().SimpleTy == MVT::f32 || @@ -897,60 +953,127 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI); - // ARM halfword load/stores need an additional operand. - if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); - - MIB.addImm(Addr.Offset); + // ARM halfword load/stores and signed byte loads need an additional + // operand. + if (useAM3) { + signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; + MIB.addReg(0); + MIB.addImm(Imm); + } else { + MIB.addImm(Addr.Offset); + } MIB.addMemOperand(MMO); } else { // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - // ARM halfword load/stores need an additional operand. - if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); - - MIB.addImm(Addr.Offset); + // ARM halfword load/stores and signed byte loads need an additional + // operand. + if (useAM3) { + signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; + MIB.addReg(0); + MIB.addImm(Imm); + } else { + MIB.addImm(Addr.Offset); + } } AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { - +bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + unsigned Alignment, bool isZExt, bool allocReg) { assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; - TargetRegisterClass *RC; + bool useAM3 = false; + bool needVMOV = false; + const TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; - case MVT::i16: - Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; + case MVT::i1: + case MVT::i8: + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8; + else + Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12; + } else { + if (isZExt) { + Opc = ARM::LDRBi12; + } else { + Opc = ARM::LDRSB; + useAM3 = true; + } + } RC = ARM::GPRRegisterClass; break; - case MVT::i8: - Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; + case MVT::i16: + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8; + else + Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12; + } else { + Opc = isZExt ? ARM::LDRH : ARM::LDRSH; + useAM3 = true; + } RC = ARM::GPRRegisterClass; break; case MVT::i32: - Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = ARM::t2LDRi8; + else + Opc = ARM::t2LDRi12; + } else { + Opc = ARM::LDRi12; + } RC = ARM::GPRRegisterClass; break; case MVT::f32: - Opc = ARM::VLDRS; - RC = TLI.getRegClassFor(VT); + if (!Subtarget->hasVFP2()) return false; + // Unaligned loads need special handling. Floats require word-alignment. + if (Alignment && Alignment < 4) { + needVMOV = true; + VT = MVT::i32; + Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + RC = ARM::GPRRegisterClass; + } else { + Opc = ARM::VLDRS; + RC = TLI.getRegClassFor(VT); + } break; case MVT::f64: + if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned loads need special handling. Doublewords require + // word-alignment. + if (Alignment && Alignment < 4) + return false; + Opc = ARM::VLDRD; RC = TLI.getRegClassFor(VT); break; } // Simplify this down to something we can handle. - ARMSimplifyAddress(Addr, VT); + ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. - ResultReg = createResultReg(RC); + if (allocReg) + ResultReg = createResultReg(RC); + assert (ResultReg > 255 && "Expected an allocated virtual register."); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); - AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad); + AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); + + // If we had an unaligned load of a float we've converted it to an regular + // load. Now we must move from the GRP to the FP register. + if (needVMOV) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVSR), MoveReg) + .addReg(ResultReg)); + ResultReg = MoveReg; + } return true; } @@ -969,51 +1092,92 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; unsigned ResultReg; - if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; + if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) + return false; UpdateValueMap(I, ResultReg); return true; } -bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { +bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment) { unsigned StrOpc; + bool useAM3 = false; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: { - unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass : + unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass); - unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; + unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), Res) .addReg(SrcReg).addImm(1)); SrcReg = Res; } // Fallthrough here. case MVT::i8: - StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRBi8; + else + StrOpc = ARM::t2STRBi12; + } else { + StrOpc = ARM::STRBi12; + } break; case MVT::i16: - StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRHi8; + else + StrOpc = ARM::t2STRHi12; + } else { + StrOpc = ARM::STRH; + useAM3 = true; + } break; case MVT::i32: - StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRi8; + else + StrOpc = ARM::t2STRi12; + } else { + StrOpc = ARM::STRi12; + } break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; - StrOpc = ARM::VSTRS; + // Unaligned stores need special handling. Floats require word-alignment. + if (Alignment && Alignment < 4) { + unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::VMOVRS), MoveReg) + .addReg(SrcReg)); + SrcReg = MoveReg; + VT = MVT::i32; + StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; + } else { + StrOpc = ARM::VSTRS; + } break; case MVT::f64: if (!Subtarget->hasVFP2()) return false; + // FIXME: Unaligned stores need special handling. Doublewords require + // word-alignment. + if (Alignment && Alignment < 4) + return false; + StrOpc = ARM::VSTRD; break; } // Simplify this down to something we can handle. - ARMSimplifyAddress(Addr, VT); + ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) - .addReg(SrcReg, getKillRegState(true)); - AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore); + .addReg(SrcReg); + AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); return true; } @@ -1039,7 +1203,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) { if (!ARMComputeAddress(I->getOperand(1), Addr)) return false; - if (!ARMEmitStore(VT, SrcReg, Addr)) return false; + if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment())) + return false; return true; } @@ -1099,30 +1264,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // If we can, avoid recomputing the compare - redoing it could lead to wonky // behavior. - // TODO: Factor this out. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { - MVT SourceVT; - Type *Ty = CI->getOperand(0)->getType(); - if (CI->hasOneUse() && (CI->getParent() == I->getParent()) - && isTypeLegal(Ty, SourceVT)) { - bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); - if (isFloat && !Subtarget->hasVFP2()) - return false; - - unsigned CmpOpc; - switch (SourceVT.SimpleTy) { - default: return false; - // TODO: Verify compares. - case MVT::f32: - CmpOpc = ARM::VCMPES; - break; - case MVT::f64: - CmpOpc = ARM::VCMPED; - break; - case MVT::i32: - CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; - break; - } + if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { // Get the compare predicate. // Try to take advantage of fallthrough opportunities. @@ -1137,23 +1280,11 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // We may not handle every CC for now. if (ARMPred == ARMCC::AL) return false; - unsigned Arg1 = getRegForValue(CI->getOperand(0)); - if (Arg1 == 0) return false; - - unsigned Arg2 = getRegForValue(CI->getOperand(1)); - if (Arg2 == 0) return false; - - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CmpOpc)) - .addReg(Arg1).addReg(Arg2)); - - // For floating point we need to move the result to a comparison register - // that we can then use for branches. - if (isFloat) - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::FMSTAT))); + // Emit the compare. + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + return false; - unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); FastEmitBranch(FBB, DL); @@ -1164,7 +1295,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { MVT SourceVT; if (TI->hasOneUse() && TI->getParent() == I->getParent() && (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { - unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; unsigned OpReg = getRegForValue(TI->getOperand(0)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) @@ -1176,7 +1307,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { CCMode = ARMCC::EQ; } - unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); @@ -1184,6 +1315,12 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { FuncInfo.MBB->addSuccessor(TBB); return true; } + } else if (const ConstantInt *CI = + dyn_cast<ConstantInt>(BI->getCondition())) { + uint64_t Imm = CI->getZExtValue(); + MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; + FastEmitBranch(Target, DL); + return true; } unsigned CmpReg = getRegForValue(BI->getCondition()); @@ -1196,7 +1333,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // Regardless, the compare has been done in the predecessor block, // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. - unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(CmpReg).addImm(1)); @@ -1206,7 +1343,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { CCMode = ARMCC::EQ; } - unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); FastEmitBranch(FBB, DL); @@ -1214,70 +1351,155 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { return true; } -bool ARMFastISel::SelectCmp(const Instruction *I) { - const CmpInst *CI = cast<CmpInst>(I); +bool ARMFastISel::SelectIndirectBr(const Instruction *I) { + unsigned AddrReg = getRegForValue(I->getOperand(0)); + if (AddrReg == 0) return false; - MVT VT; - Type *Ty = CI->getOperand(0)->getType(); - if (!isTypeLegal(Ty, VT)) - return false; + unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)) + .addReg(AddrReg)); + return true; +} - bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); +bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, + bool isZExt) { + Type *Ty = Src1Value->getType(); + EVT SrcVT = TLI.getValueType(Ty, true); + if (!SrcVT.isSimple()) return false; + + bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); if (isFloat && !Subtarget->hasVFP2()) return false; + // Check to see if the 2nd operand is a constant that we can encode directly + // in the compare. + int Imm = 0; + bool UseImm = false; + bool isNegativeImm = false; + // FIXME: At -O0 we don't have anything that canonicalizes operand order. + // Thus, Src1Value may be a ConstantInt, but we're missing it. + if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) { + if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || + SrcVT == MVT::i1) { + const APInt &CIVal = ConstInt->getValue(); + Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); + // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather + // then a cmn, because there is no way to represent 2147483648 as a + // signed 32-bit int. + if (Imm < 0 && Imm != (int)0x80000000) { + isNegativeImm = true; + Imm = -Imm; + } + UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + (ARM_AM::getSOImmVal(Imm) != -1); + } + } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) { + if (SrcVT == MVT::f32 || SrcVT == MVT::f64) + if (ConstFP->isZero() && !ConstFP->isNegative()) + UseImm = true; + } + unsigned CmpOpc; - unsigned CondReg; - switch (VT.SimpleTy) { + bool isICmp = true; + bool needsExt = false; + switch (SrcVT.getSimpleVT().SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: - CmpOpc = ARM::VCMPES; - CondReg = ARM::FPSCR; + isICmp = false; + CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; break; case MVT::f64: - CmpOpc = ARM::VCMPED; - CondReg = ARM::FPSCR; + isICmp = false; + CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + needsExt = true; + // Intentional fall-through. case MVT::i32: - CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; - CondReg = ARM::CPSR; + if (isThumb2) { + if (!UseImm) + CmpOpc = ARM::t2CMPrr; + else + CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri; + } else { + if (!UseImm) + CmpOpc = ARM::CMPrr; + else + CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri; + } break; } - // Get the compare predicate. - ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); + unsigned SrcReg1 = getRegForValue(Src1Value); + if (SrcReg1 == 0) return false; - // We may not handle every CC for now. - if (ARMPred == ARMCC::AL) return false; + unsigned SrcReg2 = 0; + if (!UseImm) { + SrcReg2 = getRegForValue(Src2Value); + if (SrcReg2 == 0) return false; + } - unsigned Arg1 = getRegForValue(CI->getOperand(0)); - if (Arg1 == 0) return false; + // We have i1, i8, or i16, we need to either zero extend or sign extend. + if (needsExt) { + SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); + if (SrcReg1 == 0) return false; + if (!UseImm) { + SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); + if (SrcReg2 == 0) return false; + } + } - unsigned Arg2 = getRegForValue(CI->getOperand(1)); - if (Arg2 == 0) return false; + if (!UseImm) { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CmpOpc)) + .addReg(SrcReg1).addReg(SrcReg2)); + } else { + MachineInstrBuilder MIB; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(SrcReg1); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(Arg1).addReg(Arg2)); + // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. + if (isICmp) + MIB.addImm(Imm); + AddOptionalDefs(MIB); + } // For floating point we need to move the result to a comparison register // that we can then use for branches. - if (isFloat) + if (Ty->isFloatTy() || Ty->isDoubleTy()) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::FMSTAT))); + return true; +} + +bool ARMFastISel::SelectCmp(const Instruction *I) { + const CmpInst *CI = cast<CmpInst>(I); + + // Get the compare predicate. + ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); + + // We may not handle every CC for now. + if (ARMPred == ARMCC::AL) return false; + + // Emit the compare. + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + return false; // Now set a register based on the comparison. Explicitly set the predicates // here. - unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; - TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass - : ARM::GPRRegisterClass; + unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; + const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass + : ARM::GPRRegisterClass; unsigned DestReg = createResultReg(RC); - Constant *Zero - = ConstantInt::get(Type::getInt32Ty(*Context), 0); + Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); + // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) .addReg(ZeroReg).addImm(1) - .addImm(ARMPred).addReg(CondReg); + .addImm(ARMPred).addReg(ARM::CPSR); UpdateValueMap(I, DestReg); return true; @@ -1321,7 +1543,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) { return true; } -bool ARMFastISel::SelectSIToFP(const Instruction *I) { +bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { // Make sure we have VFP. if (!Subtarget->hasVFP2()) return false; @@ -1330,21 +1552,30 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { if (!isTypeLegal(Ty, DstVT)) return false; - // FIXME: Handle sign-extension where necessary. - if (!I->getOperand(0)->getType()->isIntegerTy(32)) + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; - unsigned Op = getRegForValue(I->getOperand(0)); - if (Op == 0) return false; + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) return false; + + // Handle sign-extension. + if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { + EVT DestVT = MVT::i32; + SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, + /*isZExt*/!isSigned); + if (SrcReg == 0) return false; + } // The conversion routine works on fp-reg to fp-reg and the operand above // was an integer, move it to the fp registers if possible. - unsigned FP = ARMMoveToFPReg(MVT::f32, Op); + unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg); if (FP == 0) return false; unsigned Opc; - if (Ty->isFloatTy()) Opc = ARM::VSITOS; - else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; + if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS; + else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD; else return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); @@ -1355,7 +1586,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { return true; } -bool ARMFastISel::SelectFPToSI(const Instruction *I) { +bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { // Make sure we have VFP. if (!Subtarget->hasVFP2()) return false; @@ -1369,11 +1600,11 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) { unsigned Opc; Type *OpTy = I->getOperand(0)->getType(); - if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; - else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; + if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS; + else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD; else return false; - // f64->s32 or f32->s32 both need an intermediate f32 reg. + // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg. unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) @@ -1401,22 +1632,54 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { if (CondReg == 0) return false; unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - unsigned Op2Reg = getRegForValue(I->getOperand(2)); - if (Op2Reg == 0) return false; - unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + // Check to see if we can use an immediate in the conditional move. + int Imm = 0; + bool UseImm = false; + bool isNegativeImm = false; + if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) { + assert (VT == MVT::i32 && "Expecting an i32."); + Imm = (int)ConstInt->getValue().getZExtValue(); + if (Imm < 0) { + isNegativeImm = true; + Imm = ~Imm; + } + UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + (ARM_AM::getSOImmVal(Imm) != -1); + } + + unsigned Op2Reg = 0; + if (!UseImm) { + Op2Reg = getRegForValue(I->getOperand(2)); + if (Op2Reg == 0) return false; + } + + unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(CondReg).addImm(1)); + .addReg(CondReg).addImm(0)); + + unsigned MovCCOpc; + if (!UseImm) { + MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; + } else { + if (!isNegativeImm) { + MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; + } else { + MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; + } + } unsigned ResultReg = createResultReg(RC); - unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) - .addReg(Op1Reg).addReg(Op2Reg) - .addImm(ARMCC::EQ).addReg(ARM::CPSR); + if (!UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) + .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) + .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); UpdateValueMap(I, ResultReg); return true; } -bool ARMFastISel::SelectSDiv(const Instruction *I) { +bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) { MVT VT; Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) @@ -1430,21 +1693,21 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) { // Otherwise emit a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i8) - LC = RTLIB::SDIV_I8; + LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8; else if (VT == MVT::i16) - LC = RTLIB::SDIV_I16; + LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16; else if (VT == MVT::i32) - LC = RTLIB::SDIV_I32; + LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32; else if (VT == MVT::i64) - LC = RTLIB::SDIV_I64; + LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64; else if (VT == MVT::i128) - LC = RTLIB::SDIV_I128; + LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); return ARMEmitLibcall(I, LC); } -bool ARMFastISel::SelectSRem(const Instruction *I) { +bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) { MVT VT; Type *Ty = I->getType(); if (!isTypeLegal(Ty, VT)) @@ -1452,21 +1715,59 @@ bool ARMFastISel::SelectSRem(const Instruction *I) { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i8) - LC = RTLIB::SREM_I8; + LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8; else if (VT == MVT::i16) - LC = RTLIB::SREM_I16; + LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16; else if (VT == MVT::i32) - LC = RTLIB::SREM_I32; + LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32; else if (VT == MVT::i64) - LC = RTLIB::SREM_I64; + LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64; else if (VT == MVT::i128) - LC = RTLIB::SREM_I128; + LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); return ARMEmitLibcall(I, LC); } -bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { +bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { + EVT DestVT = TLI.getValueType(I->getType(), true); + + // We can get here in the case when we have a binary operation on a non-legal + // type and the target independent selector doesn't know how to handle it. + if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) + return false; + + unsigned Opc; + switch (ISDOpcode) { + default: return false; + case ISD::ADD: + Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr; + break; + case ISD::OR: + Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr; + break; + case ISD::SUB: + Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr; + break; + } + + unsigned SrcReg1 = getRegForValue(I->getOperand(0)); + if (SrcReg1 == 0) return false; + + // TODO: Often the 2nd operand is an immediate, which can be encoded directly + // in the instruction, rather then materializing the value in a register. + unsigned SrcReg2 = getRegForValue(I->getOperand(1)); + if (SrcReg2 == 0) return false; + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(SrcReg1).addReg(SrcReg2)); + UpdateValueMap(I, ResultReg); + return true; +} + +bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { EVT VT = TLI.getValueType(I->getType(), true); // We can get here in the case when we want to use NEON for our fp @@ -1478,12 +1779,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { if (isFloat && !Subtarget->hasVFP2()) return false; - unsigned Op1 = getRegForValue(I->getOperand(0)); - if (Op1 == 0) return false; - - unsigned Op2 = getRegForValue(I->getOperand(1)); - if (Op2 == 0) return false; - unsigned Opc; bool is64bit = VT == MVT::f64 || VT == MVT::i64; switch (ISDOpcode) { @@ -1498,6 +1793,12 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { Opc = is64bit ? ARM::VMULD : ARM::VMULS; break; } + unsigned Op1 = getRegForValue(I->getOperand(0)); + if (Op1 == 0) return false; + + unsigned Op2 = getRegForValue(I->getOperand(1)); + if (Op2 == 0) return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) @@ -1508,18 +1809,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { // Call Handling Code -bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, - EVT SrcVT, unsigned &ResultReg) { - unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, - Src, /*TODO: Kill=*/false); - - if (RR != 0) { - ResultReg = RR; - return true; - } else - return false; -} - // This is largely taken directly from CCAssignFnForNode - we don't support // varargs in FastISel so that part has been removed. // TODO: We may not support all of this. @@ -1536,7 +1825,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard) + TM.Options.FloatABIType == FloatABI::Hard) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); @@ -1548,11 +1837,6 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); - case CallingConv::GHC: - if (Return) - llvm_unreachable("Can't return in GHC call convention"); - else - return CC_ARM_APCS_GHC; } } @@ -1567,6 +1851,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); + // Check that we can handle all of the arguments. If we can't, then bail out + // now before we add code to the MBB. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // We don't handle NEON/vector parameters yet. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) + return false; + + // Now copy/store arg to correct locations. + if (VA.isRegLoc() && !VA.needsCustom()) { + continue; + } else if (VA.needsCustom()) { + // TODO: We need custom lowering for vector (v2f64) args. + if (VA.getLocVT() != MVT::f64 || + // TODO: Only handle register args for now. + !VA.isRegLoc() || !ArgLocs[++i].isRegLoc()) + return false; + } else { + switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) { + default: + return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + break; + case MVT::f32: + if (!Subtarget->hasVFP2()) + return false; + break; + case MVT::f64: + if (!Subtarget->hasVFP2()) + return false; + break; + } + } + } + + // At the point, we are able to handle the call's arguments in fast isel. + // Get a count of how many bytes are to be pushed on the stack. NumBytes = CCInfo.getNextStackOffset(); @@ -1582,41 +1908,26 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, unsigned Arg = ArgRegs[VA.getValNo()]; MVT ArgVT = ArgVTs[VA.getValNo()]; - // We don't handle NEON/vector parameters yet. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) - return false; + assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) && + "We don't handle NEON/vector parameters yet."); // Handle arg promotion, etc. switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: { - bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - assert(Emitted && "Failed to emit a sext!"); (void)Emitted; - Emitted = true; - ArgVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); + Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false); + assert (Arg != 0 && "Failed to emit a sext"); + ArgVT = DestVT; break; } + case CCValAssign::AExt: + // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { - bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - assert(Emitted && "Failed to emit a zext!"); (void)Emitted; - Emitted = true; - ArgVT = VA.getLocVT(); - break; - } - case CCValAssign::AExt: { - bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - if (!Emitted) - Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - if (!Emitted) - Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - - assert(Emitted && "Failed to emit a aext!"); (void)Emitted; - ArgVT = VA.getLocVT(); + MVT DestVT = VA.getLocVT(); + Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); + assert (Arg != 0 && "Failed to emit a sext"); + ArgVT = DestVT; break; } case CCValAssign::BCvt: { @@ -1634,16 +1945,17 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, if (VA.isRegLoc() && !VA.needsCustom()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), VA.getLocReg()) - .addReg(Arg); + .addReg(Arg); RegArgs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { // TODO: We need custom lowering for vector (v2f64) args. - if (VA.getLocVT() != MVT::f64) return false; + assert(VA.getLocVT() == MVT::f64 && + "Custom lowering for v2f64 args not available"); CCValAssign &NextVA = ArgLocs[++i]; - // TODO: Only handle register args for now. - if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; + assert(VA.isRegLoc() && NextVA.isRegLoc() && + "We only handle register args!"); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VMOVRRD), VA.getLocReg()) @@ -1659,9 +1971,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, Addr.Base.Reg = ARM::SP; Addr.Offset = VA.getLocMemOffset(); - if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; + bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet; + assert(EmitRet && "Could not emit a store for argument!"); } } + return true; } @@ -1685,7 +1999,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // For this move we copy into two registers and then move into the // double fp reg we want. EVT DestVT = RVLocs[0].getValVT(); - TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); + const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); unsigned ResultReg = createResultReg(DstRC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VMOVDRR), ResultReg) @@ -1700,7 +2014,12 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, } else { assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); EVT CopyVT = RVLocs[0].getValVT(); - TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); + + // Special handling for extended integers. + if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) + CopyVT = MVT::i32; + + const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); unsigned ResultReg = createResultReg(DstRC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), @@ -1753,13 +2072,26 @@ bool ARMFastISel::SelectRet(const Instruction *I) { // Only handle register returns for now. if (!VA.isRegLoc()) return false; - // TODO: For now, don't try to handle cases where getLocInfo() - // says Full but the types don't match. - if (TLI.getValueType(RV->getType()) != VA.getValVT()) - return false; - // Make the copy. unsigned SrcReg = Reg + VA.getValNo(); + EVT RVVT = TLI.getValueType(RV->getType()); + EVT DestVT = VA.getValVT(); + // Special handling for extended integers. + if (RVVT != DestVT) { + if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) + return false; + + assert(DestVT == MVT::i32 && "ARM should always ext to i32"); + + // Perform extension if flagged as either zext or sext. Otherwise, do + // nothing. + if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) { + SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt()); + if (SrcReg == 0) return false; + } + } + + // Make the copy. unsigned DstReg = VA.getLocReg(); const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); // Avoid a cross-class copy. This is very unlikely. @@ -1772,20 +2104,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) { MRI.addLiveOut(VA.getLocReg()); } - unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; + unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(RetOpc))); return true; } unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - - // Darwin needs the r9 versions of the opcodes. - bool isDarwin = Subtarget->isTargetDarwin(); - if (isThumb) { - return isDarwin ? ARM::tBLr9 : ARM::tBL; + if (isThumb2) { + return ARM::tBL; } else { - return isDarwin ? ARM::BLr9 : ARM::BL; + return ARM::BL; } } @@ -1844,11 +2173,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for darwin, BL otherwise. - // TODO: Turn this into the table of arm call ops. + // Issue the call. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); - if(isThumb) + if (isThumb2) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) @@ -1863,6 +2191,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; @@ -1873,12 +2205,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return true; } -bool ARMFastISel::SelectCall(const Instruction *I) { +bool ARMFastISel::SelectCall(const Instruction *I, + const char *IntrMemName = 0) { const CallInst *CI = cast<CallInst>(I); const Value *Callee = CI->getCalledValue(); - // Can't handle inline asm or worry about intrinsics yet. - if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; + // Can't handle inline asm. + if (isa<InlineAsm>(Callee)) return false; // Only handle global variable Callees. const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); @@ -1902,7 +2235,8 @@ bool ARMFastISel::SelectCall(const Instruction *I) { MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; - else if (!isTypeLegal(RetTy, RetVT)) + else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && + RetVT != MVT::i8 && RetVT != MVT::i1) return false; // TODO: For now if we have long calls specified we don't handle the call. @@ -1913,16 +2247,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) { SmallVector<unsigned, 8> ArgRegs; SmallVector<MVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; - Args.reserve(CS.arg_size()); - ArgRegs.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); + unsigned arg_size = CS.arg_size(); + Args.reserve(arg_size); + ArgRegs.reserve(arg_size); + ArgVTs.reserve(arg_size); + ArgFlags.reserve(arg_size); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - unsigned Arg = getRegForValue(*i); + // If we're lowering a memory intrinsic instead of a regular call, skip the + // last two arguments, which shouldn't be passed to the underlying function. + if (IntrMemName && e-i <= 2) + break; - if (Arg == 0) - return false; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; if (CS.paramHasAttr(AttrInd, Attribute::SExt)) @@ -1930,7 +2266,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) Flags.setZExt(); - // FIXME: Only handle *easy* calls for now. + // FIXME: Only handle *easy* calls for now. if (CS.paramHasAttr(AttrInd, Attribute::InReg) || CS.paramHasAttr(AttrInd, Attribute::StructRet) || CS.paramHasAttr(AttrInd, Attribute::Nest) || @@ -1939,8 +2275,14 @@ bool ARMFastISel::SelectCall(const Instruction *I) { Type *ArgTy = (*i)->getType(); MVT ArgVT; - if (!isTypeLegal(ArgTy, ArgVT)) + if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 && + ArgVT != MVT::i1) return false; + + unsigned Arg = getRegForValue(*i); + if (Arg == 0) + return false; + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); @@ -1956,26 +2298,38 @@ bool ARMFastISel::SelectCall(const Instruction *I) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for darwin, BL otherwise. - // TODO: Turn this into the table of arm call ops. + // Issue the call. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb) + if(isThumb2) { // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))) - .addGlobalAddress(GV, 0, 0); - else - // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addGlobalAddress(GV, 0, 0)); - + TII.get(CallOpc))); + if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); + } else { + if (!IntrMemName) + // Explicitly adding the predicate here. + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc)) + .addGlobalAddress(GV, 0, 0)); + else + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc)) + .addExternalSymbol(IntrMemName, 0)); + } + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CC)); + // Finish off the call including any return values. SmallVector<unsigned, 4> UsedRegs; if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; @@ -1984,83 +2338,187 @@ bool ARMFastISel::SelectCall(const Instruction *I) { static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); return true; +} +bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { + return Len <= 16; } -bool ARMFastISel::SelectIntCast(const Instruction *I) { - // On ARM, in general, integer casts don't involve legal types; this code - // handles promotable integers. The high bits for a type smaller than - // the register size are assumed to be undefined. - Type *DestTy = I->getType(); - Value *Op = I->getOperand(0); - Type *SrcTy = Op->getType(); +bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, + uint64_t Len) { + // Make sure we don't bloat code by inlining very large memcpy's. + if (!ARMIsMemCpySmall(Len)) + return false; - EVT SrcVT, DestVT; - SrcVT = TLI.getValueType(SrcTy, true); - DestVT = TLI.getValueType(DestTy, true); + // We don't care about alignment here since we just emit integer accesses. + while (Len) { + MVT VT; + if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert(Len == 1); + VT = MVT::i8; + } - if (isa<TruncInst>(I)) { - if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) - return false; - if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) + bool RV; + unsigned ResultReg; + RV = ARMEmitLoad(VT, ResultReg, Src); + assert (RV == true && "Should be able to handle this load."); + RV = ARMEmitStore(VT, ResultReg, Dest); + assert (RV == true && "Should be able to handle this store."); + (void)RV; + + unsigned Size = VT.getSizeInBits()/8; + Len -= Size; + Dest.Offset += Size; + Src.Offset += Size; + } + + return true; +} + +bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { + // FIXME: Handle more intrinsics. + switch (I.getIntrinsicID()) { + default: return false; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + const MemTransferInst &MTI = cast<MemTransferInst>(I); + // Don't handle volatile. + if (MTI.isVolatile()) return false; - unsigned SrcReg = getRegForValue(Op); - if (!SrcReg) return false; + // Disable inlining for memmove before calls to ComputeAddress. Otherwise, + // we would emit dead code because we don't currently handle memmoves. + bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); + if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) { + // Small memcpy's are common enough that we want to do them without a call + // if possible. + uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue(); + if (ARMIsMemCpySmall(Len)) { + Address Dest, Src; + if (!ARMComputeAddress(MTI.getRawDest(), Dest) || + !ARMComputeAddress(MTI.getRawSource(), Src)) + return false; + if (ARMTryEmitSmallMemCpy(Dest, Src, Len)) + return true; + } + } + + if (!MTI.getLength()->getType()->isIntegerTy(32)) + return false; + + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) + return false; - // Because the high bits are undefined, a truncate doesn't generate - // any code. - UpdateValueMap(I, SrcReg); - return true; + const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove"; + return SelectCall(&I, IntrMemName); } - if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) + case Intrinsic::memset: { + const MemSetInst &MSI = cast<MemSetInst>(I); + // Don't handle volatile. + if (MSI.isVolatile()) + return false; + + if (!MSI.getLength()->getType()->isIntegerTy(32)) + return false; + + if (MSI.getDestAddressSpace() > 255) + return false; + + return SelectCall(&I, "memset"); + } + } +} + +bool ARMFastISel::SelectTrunc(const Instruction *I) { + // The high bits for a type smaller than the register size are assumed to be + // undefined. + Value *Op = I->getOperand(0); + + EVT SrcVT, DestVT; + SrcVT = TLI.getValueType(Op->getType(), true); + DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) + return false; + if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) return false; + unsigned SrcReg = getRegForValue(Op); + if (!SrcReg) return false; + + // Because the high bits are undefined, a truncate doesn't generate + // any code. + UpdateValueMap(I, SrcReg); + return true; +} + +unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, + bool isZExt) { + if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) + return 0; + unsigned Opc; - bool isZext = isa<ZExtInst>(I); bool isBoolZext = false; - if (!SrcVT.isSimple()) - return false; + if (!SrcVT.isSimple()) return 0; switch (SrcVT.getSimpleVT().SimpleTy) { - default: return false; + default: return 0; case MVT::i16: - if (!Subtarget->hasV6Ops()) return false; - if (isZext) - Opc = isThumb ? ARM::t2UXTH : ARM::UXTH; + if (!Subtarget->hasV6Ops()) return 0; + if (isZExt) + Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; else - Opc = isThumb ? ARM::t2SXTH : ARM::SXTH; + Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case MVT::i8: - if (!Subtarget->hasV6Ops()) return false; - if (isZext) - Opc = isThumb ? ARM::t2UXTB : ARM::UXTB; + if (!Subtarget->hasV6Ops()) return 0; + if (isZExt) + Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; else - Opc = isThumb ? ARM::t2SXTB : ARM::SXTB; + Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case MVT::i1: - if (isZext) { - Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; + if (isZExt) { + Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; isBoolZext = true; break; } - return false; + return 0; } - // FIXME: We could save an instruction in many cases by special-casing - // load instructions. - unsigned SrcReg = getRegForValue(Op); - if (!SrcReg) return false; - - unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg); if (isBoolZext) MIB.addImm(1); else MIB.addImm(0); AddOptionalDefs(MIB); - UpdateValueMap(I, DestReg); + return ResultReg; +} + +bool ARMFastISel::SelectIntExt(const Instruction *I) { + // On ARM, in general, integer casts don't involve legal types; this code + // handles promotable integers. + Type *DestTy = I->getType(); + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + + EVT SrcVT, DestVT; + SrcVT = TLI.getValueType(SrcTy, true); + DestVT = TLI.getValueType(DestTy, true); + + bool isZExt = isa<ZExtInst>(I); + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) return false; + + unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); + if (ResultReg == 0) return false; + UpdateValueMap(I, ResultReg); return true; } @@ -2074,6 +2532,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return SelectStore(I); case Instruction::Br: return SelectBranch(I); + case Instruction::IndirectBr: + return SelectIndirectBr(I); case Instruction::ICmp: case Instruction::FCmp: return SelectCmp(I); @@ -2082,42 +2542,105 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::FPTrunc: return SelectFPTrunc(I); case Instruction::SIToFP: - return SelectSIToFP(I); + return SelectIToFP(I, /*isSigned*/ true); + case Instruction::UIToFP: + return SelectIToFP(I, /*isSigned*/ false); case Instruction::FPToSI: - return SelectFPToSI(I); + return SelectFPToI(I, /*isSigned*/ true); + case Instruction::FPToUI: + return SelectFPToI(I, /*isSigned*/ false); + case Instruction::Add: + return SelectBinaryIntOp(I, ISD::ADD); + case Instruction::Or: + return SelectBinaryIntOp(I, ISD::OR); + case Instruction::Sub: + return SelectBinaryIntOp(I, ISD::SUB); case Instruction::FAdd: - return SelectBinaryOp(I, ISD::FADD); + return SelectBinaryFPOp(I, ISD::FADD); case Instruction::FSub: - return SelectBinaryOp(I, ISD::FSUB); + return SelectBinaryFPOp(I, ISD::FSUB); case Instruction::FMul: - return SelectBinaryOp(I, ISD::FMUL); + return SelectBinaryFPOp(I, ISD::FMUL); case Instruction::SDiv: - return SelectSDiv(I); + return SelectDiv(I, /*isSigned*/ true); + case Instruction::UDiv: + return SelectDiv(I, /*isSigned*/ false); case Instruction::SRem: - return SelectSRem(I); + return SelectRem(I, /*isSigned*/ true); + case Instruction::URem: + return SelectRem(I, /*isSigned*/ false); case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) + return SelectIntrinsicCall(*II); return SelectCall(I); case Instruction::Select: return SelectSelect(I); case Instruction::Ret: return SelectRet(I); case Instruction::Trunc: + return SelectTrunc(I); case Instruction::ZExt: case Instruction::SExt: - return SelectIntCast(I); + return SelectIntExt(I); default: break; } return false; } +/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// vreg is being provided by the specified load instruction. If possible, +/// try to fold the load as an operand to the instruction, returning true if +/// successful. +bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(LI->getType(), VT)) + return false; + + // Combine load followed by zero- or sign-extend. + // ldrb r1, [r0] ldrb r1, [r0] + // uxtb r2, r1 => + // mov r3, r2 mov r3, r1 + bool isZExt = true; + switch(MI->getOpcode()) { + default: return false; + case ARM::SXTH: + case ARM::t2SXTH: + isZExt = false; + case ARM::UXTH: + case ARM::t2UXTH: + if (VT != MVT::i16) + return false; + break; + case ARM::SXTB: + case ARM::t2SXTB: + isZExt = false; + case ARM::UXTB: + case ARM::t2UXTB: + if (VT != MVT::i8) + return false; + break; + } + // See if we can handle this address. + Address Addr; + if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; + + unsigned ResultReg = MI->getOperand(0).getReg(); + if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false)) + return false; + MI->eraseFromParent(); + return true; +} + namespace llvm { - llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { - // Completely untested on non-darwin. + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); - if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && + if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() && !DisableARMFastISel) return new ARMFastISel(funcInfo); return 0; diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 412751b..402ecb0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -1,4 +1,4 @@ -//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====// +//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -15,31 +15,40 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "llvm/CallingConv.h" -#include "llvm/Function.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<bool> +SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), + cl::desc("Align ARM NEON spills in prolog and epilog")); + +static MachineBasicBlock::iterator +skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs); + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - // Mac OS X requires FP not to be clobbered for backtracing purpose. - if (STI.isTargetDarwin()) + // iOS requires FP not to be clobbered for backtracing purpose. + if (STI.isTargetIOS()) return true; const MachineFrameInfo *MFI = MF.getFrameInfo(); // Always eliminate non-leaf frame pointers. - return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || + return ((MF.getTarget().Options.DisableFramePointerElim(MF) && + MFI->hasCalls()) || RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -72,7 +81,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { +static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { for (unsigned i = 0; CSRegs[i]; ++i) if (Reg == CSRegs[i]) return true; @@ -81,7 +90,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, - const unsigned *CSRegs) { + const uint16_t *CSRegs) { // Integer spill area is handled with "pop". if (MI->getOpcode() == ARM::LDMIA_RET || MI->getOpcode() == ARM::t2LDMIA_RET || @@ -140,10 +149,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // belongs to which callee-save spill areas. unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; int FramePtrSpillFI = 0; - - // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) - return; + int D8SpillFI = 0; // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) @@ -177,7 +183,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { + if (STI.isTargetIOS()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { @@ -186,8 +192,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } break; default: - AFI->addDPRCalleeSavedAreaFrame(FI); - DPRCSSize += 8; + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) { + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } } } @@ -195,8 +206,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (GPRCS1Size > 0) MBBI++; // Set FP to point to the stack slot that contains the previous FP. - // For Darwin, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not Darwin, all the callee-saved registers go + // For iOS, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not iOS, all the callee-saved registers go // into spill area 1, including the FP in R11. In either case, it is // now safe to emit this assignment. bool HasFP = hasFP(MF); @@ -232,7 +243,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI++; } - NumBytes = DPRCSOffset; + // Move past the aligned DPRCS2 area. + if (AFI->getNumAlignedDPRCS2Regs() > 0) { + MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); + // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and + // leaves the stack pointer pointing to the DPRCS2 area. + // + // Adjust NumBytes to represent the stack slots below the DPRCS2 area. + NumBytes += MFI->getObjectOffset(D8SpillFI); + } else + NumBytes = DPRCSOffset; + if (NumBytes) { // Adjust SP after all the callee-save spills. emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, @@ -259,7 +280,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // If we need dynamic stack realignment, do it here. Be paranoid and make // sure if we also have VLAs, we have a base pointer for frame access. - if (RegInfo->needsStackRealignment(MF)) { + // If aligned NEON registers were spilled, the stack has already been + // realigned. + if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { @@ -315,8 +338,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->getDesc().isReturn() && - "Can only insert epilog into returning blocks"); + assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -332,16 +354,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) - return; - if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; @@ -365,7 +383,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ARMCC::AL, 0, TII); else { // It's not possible to restore SP from FP in a single instruction. - // For Darwin, this looks like: + // For iOS, this looks like: // mov sp, r7 // sub sp, #24 // This is bad, if an interrupt is taken after the mov, sp is in an @@ -404,17 +422,16 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) { // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { - unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) - ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) - : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); + if (RetOpcode == ARM::TCRETURNdi) { + unsigned TCOpcode = STI.isThumb() ? + (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : + ARM::TAILJMPd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), @@ -431,10 +448,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); - } else if (RetOpcode == ARM::TCRETURNriND) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). - addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); @@ -481,6 +494,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, else if (AFI->isDPRCalleeSavedAreaFrame(FI)) return Offset - AFI->getDPRCalleeSavedAreaOffset(); + // SP can move around if there are allocas. We may also lose track of SP + // when emergency spilling inside a non-reserved call frame setup. + bool hasMovingSP = !hasReservedCallFrame(MF); + // When dynamically realigning the stack, use the frame pointer for // parameters, and the stack/base pointer for locals. if (RegInfo->needsStackRealignment(MF)) { @@ -488,7 +505,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, if (isFixed) { FrameReg = RegInfo->getFrameRegister(MF); Offset = FPOffset; - } else if (MFI->hasVarSizedObjects()) { + } else if (hasMovingSP) { assert(RegInfo->hasBasePointer(MF) && "VLAs and dynamic stack alignment, but missing base pointer!"); FrameReg = RegInfo->getBaseRegister(); @@ -500,11 +517,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, if (hasFP(MF) && AFI->hasStackFrame()) { // Use frame pointer to reference fixed objects. Use it for locals if // there are VLAs (and thus the SP isn't reliable as a base). - if (isFixed || (MFI->hasVarSizedObjects() && - !RegInfo->hasBasePointer(MF))) { + if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; - } else if (MFI->hasVarSizedObjects()) { + } else if (hasMovingSP) { assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); if (AFI->isThumb2Function()) { // Try to use the frame pointer if we can, else use the base pointer @@ -551,6 +567,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned StmOpc, unsigned StrOpc, bool NoGap, bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); @@ -564,7 +581,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.isTargetIOS())) continue; + + // D-registers in the aligned area DPRCS2 are NOT spilled here. + if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) + continue; // Add the callee-saved register as live-in unless it's LR and // @llvm.returnaddress is called. If LR is returned for @@ -614,16 +635,15 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, - bool(*Func)(unsigned, bool)) const { + bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || - RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || - RetOpcode == ARM::TCRETURNriND); + RetOpcode == ARM::TCRETURNri); SmallVector<unsigned, 4> Regs; unsigned i = CSI.size(); @@ -632,7 +652,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetDarwin())) continue; + if (!(Func)(Reg, STI.isTargetIOS())) continue; + + // The aligned reloads from area DPRCS2 are not inserted here. + if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) + continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { Reg = ARM::PC; @@ -686,6 +710,247 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, } } +/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers +/// starting from d8. Also insert stack realignment code and leave the stack +/// pointer pointing to the d8 spill slot. +static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + // Mark the D-register spill slots as properly aligned. Since MFI computes + // stack slot layout backwards, this can actually mean that the d-reg stack + // slot offsets can be wrong. The offset for d8 will always be correct. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned DNum = CSI[i].getReg() - ARM::D8; + if (DNum >= 8) + continue; + int FI = CSI[i].getFrameIdx(); + // The even-numbered registers will be 16-byte aligned, the odd-numbered + // registers will be 8-byte aligned. + MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); + + // The stack slot for D8 needs to be maximally aligned because this is + // actually the point where we align the stack pointer. MachineFrameInfo + // computes all offsets relative to the incoming stack pointer which is a + // bit weird when realigning the stack. Any extra padding for this + // over-alignment is not realized because the code inserted below adjusts + // the stack pointer by numregs * 8 before aligning the stack pointer. + if (DNum == 0) + MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); + } + + // Move the stack pointer to the d8 spill slot, and align it at the same + // time. Leave the stack slot address in the scratch register r4. + // + // sub r4, sp, #numregs * 8 + // bic r4, r4, #align - 1 + // mov sp, r4 + // + bool isThumb = AFI->isThumbFunction(); + assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); + AFI->setShouldRestoreSPFromFP(true); + + // sub r4, sp, #numregs * 8 + // The immediate is <= 64, so it doesn't need any special encoding. + unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs))); + + // bic r4, r4, #align-1 + Opc = isThumb ? ARM::t2BICri : ARM::BICri; + unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addReg(ARM::R4, RegState::Kill) + .addImm(MaxAlign - 1))); + + // mov sp, r4 + // The stack pointer must be adjusted before spilling anything, otherwise + // the stack slots could be clobbered by an interrupt handler. + // Leave r4 live, it is used below. + Opc = isThumb ? ARM::tMOVr : ARM::MOVr; + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) + .addReg(ARM::R4); + MIB = AddDefaultPred(MIB); + if (!isThumb) + AddDefaultCC(MIB); + + // Now spill NumAlignedDPRCS2Regs registers starting from d8. + // r4 holds the stack slot address. + unsigned NextReg = ARM::D8; + + // 16-byte aligned vst1.64 with 4 d-regs and address writeback. + // The writeback is only needed when emitting two vst1.64 instructions. + if (NumAlignedDPRCS2Regs >= 6) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), + ARM::R4) + .addReg(ARM::R4, RegState::Kill).addImm(16) + .addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // We won't modify r4 beyond this point. It currently points to the next + // register to be spilled. + unsigned R4BaseReg = NextReg; + + // 16-byte aligned vst1.64 with 4 d-regs, no writeback. + if (NumAlignedDPRCS2Regs >= 4) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) + .addReg(ARM::R4).addImm(16).addReg(NextReg) + .addReg(SupReg, RegState::ImplicitKill)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // 16-byte aligned vst1.64 with 2 d-regs. + if (NumAlignedDPRCS2Regs >= 2) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QPRRegisterClass); + MBB.addLiveIn(SupReg); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) + .addReg(ARM::R4).addImm(16).addReg(SupReg)); + NextReg += 2; + NumAlignedDPRCS2Regs -= 2; + } + + // Finally, use a vanilla vstr.64 for the odd last register. + if (NumAlignedDPRCS2Regs) { + MBB.addLiveIn(NextReg); + // vstr.64 uses addrmode5 which has an offset scale of 4. + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) + .addReg(NextReg) + .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); + } + + // The last spill instruction inserted should kill the scratch register r4. + llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); +} + +/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an +/// iterator to the following instruction. +static MachineBasicBlock::iterator +skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs) { + // sub r4, sp, #numregs * 8 + // bic r4, r4, #align - 1 + // mov sp, r4 + ++MI; ++MI; ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + + // These switches all fall through. + switch(NumAlignedDPRCS2Regs) { + case 7: + ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + default: + ++MI; + assert(MI->mayStore() && "Expecting spill instruction"); + case 1: + case 2: + case 4: + assert(MI->killsRegister(ARM::R4) && "Missed kill flag"); + ++MI; + } + return MI; +} + +/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers +/// starting from d8. These instructions are assumed to execute while the +/// stack is still aligned, unlike the code inserted by emitPopInst. +static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned NumAlignedDPRCS2Regs, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // Find the frame index assigned to d8. + int D8SpillFI = 0; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) + if (CSI[i].getReg() == ARM::D8) { + D8SpillFI = CSI[i].getFrameIdx(); + break; + } + + // Materialize the address of the d8 spill slot into the scratch register r4. + // This can be fairly complicated if the stack frame is large, so just use + // the normal frame index elimination mechanism to do it. This code runs as + // the initial part of the epilog where the stack and base pointers haven't + // been changed yet. + bool isThumb = AFI->isThumbFunction(); + assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); + + unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + .addFrameIndex(D8SpillFI).addImm(0))); + + // Now restore NumAlignedDPRCS2Regs registers starting from d8. + unsigned NextReg = ARM::D8; + + // 16-byte aligned vld1.64 with 4 d-regs and writeback. + if (NumAlignedDPRCS2Regs >= 6) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) + .addReg(ARM::R4, RegState::Define) + .addReg(ARM::R4, RegState::Kill).addImm(16) + .addReg(SupReg, RegState::ImplicitDefine)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // We won't modify r4 beyond this point. It currently points to the next + // register to be spilled. + unsigned R4BaseReg = NextReg; + + // 16-byte aligned vld1.64 with 4 d-regs, no writeback. + if (NumAlignedDPRCS2Regs >= 4) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QQPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) + .addReg(ARM::R4).addImm(16) + .addReg(SupReg, RegState::ImplicitDefine)); + NextReg += 4; + NumAlignedDPRCS2Regs -= 4; + } + + // 16-byte aligned vld1.64 with 2 d-regs. + if (NumAlignedDPRCS2Regs >= 2) { + unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, + ARM::QPRRegisterClass); + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) + .addReg(ARM::R4).addImm(16)); + NextReg += 2; + NumAlignedDPRCS2Regs -= 2; + } + + // Finally, use a vanilla vldr.64 for the remaining odd register. + if (NumAlignedDPRCS2Regs) + AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) + .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); + + // Last store kills r4. + llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); +} + bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, @@ -700,12 +965,19 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; unsigned FltOpc = ARM::VSTMDDB_UPD; - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, MachineInstr::FrameSetup); emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, - MachineInstr::FrameSetup); + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + + // The code above does not insert spill code for the aligned DPRCS2 registers. + // The stack realignment code will be inserted between the push instructions + // and these spills. + if (NumAlignedDPRCS2Regs) + emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); return true; } @@ -720,15 +992,22 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; + unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + + // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 + // registers. Do that here instead. + if (NumAlignedDPRCS2Regs) + emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; - emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register); + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea2Register); + &isARMArea2Register, 0); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea1Register); + &isARMArea1Register, 0); return true; } @@ -852,6 +1131,55 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, return Limit; } +// In functions that realign the stack, it can be an advantage to spill the +// callee-saved vector registers after realigning the stack. The vst1 and vld1 +// instructions take alignment hints that can improve performance. +// +static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { + MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); + if (!SpillAlignedNEONRegs) + return; + + // Naked functions don't spill callee-saved registers. + if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + return; + + // We are planning to use NEON instructions vst1 / vld1. + if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON()) + return; + + // Don't bother if the default stack alignment is sufficiently high. + if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8) + return; + + // Aligned spills require stack realignment. + const ARMBaseRegisterInfo *RegInfo = + static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + if (!RegInfo->canRealignStack(MF)) + return; + + // We always spill contiguous d-registers starting from d8. Count how many + // needs spilling. The register allocator will almost always use the + // callee-saved registers in order, but it can happen that there are holes in + // the range. Registers above the hole will be spilled to the standard DPRCS + // area. + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned NumSpills = 0; + for (; NumSpills < 8; ++NumSpills) + if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills)) + break; + + // Don't do this for just one d-register. It's not worth it. + if (NumSpills < 2) + return; + + // Spill the first NumSpills D-registers after realigning the stack. + MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); + + // A scratch register is required for the vst1 / vld1 instructions. + MF.getRegInfo().setPhysRegUsed(ARM::R4); +} + void ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -898,28 +1226,22 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MF.getRegInfo().setPhysRegUsed(ARM::R4); } + // See if we can spill vector registers to aligned stack. + checkNumAlignedDPRCS2Regs(MF); + // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; - if (MF.getRegInfo().isPhysRegUsed(Reg)) { + if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) { Spilled = true; CanEliminateFrame = false; - } else { - // Check alias registers too. - for (const unsigned *Aliases = - RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) { - if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { - Spilled = true; - CanEliminateFrame = false; - } - } } if (!ARM::GPRRegisterClass->contains(Reg)) @@ -928,7 +1250,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (Spilled) { NumGPRSpills++; - if (!STI.isTargetDarwin()) { + if (!STI.isTargetIOS()) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -948,7 +1270,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, break; } } else { - if (!STI.isTargetDarwin()) { + if (!STI.isTargetIOS()) { UnspilledCS1GPRs.push_back(Reg); continue; } diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h index 61bb8af..a1c2b93 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -63,12 +63,13 @@ public: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool), + bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const; void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, - bool(*Func)(unsigned, bool)) const; + bool(*Func)(unsigned, bool), + unsigned NumAlignedDPRCS2Regs) const; }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp b/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp deleted file mode 100644 index 5f863ea..0000000 --- a/contrib/llvm/lib/Target/ARM/ARMGlobalMerge.cpp +++ /dev/null @@ -1,219 +0,0 @@ -//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This pass merges globals with internal linkage into one. This way all the -// globals which were merged into a biggest one can be addressed using offsets -// from the same base pointer (no need for separate base pointer for each of the -// global). Such a transformation can significantly reduce the register pressure -// when many globals are involved. -// -// For example, consider the code which touches several global variables at -// once: -// -// static int foo[N], bar[N], baz[N]; -// -// for (i = 0; i < N; ++i) { -// foo[i] = bar[i] * baz[i]; -// } -// -// On ARM the addresses of 3 arrays should be kept in the registers, thus -// this code has quite large register pressure (loop body): -// -// ldr r1, [r5], #4 -// ldr r2, [r6], #4 -// mul r1, r2, r1 -// str r1, [r0], #4 -// -// Pass converts the code to something like: -// -// static struct { -// int foo[N]; -// int bar[N]; -// int baz[N]; -// } merged; -// -// for (i = 0; i < N; ++i) { -// merged.foo[i] = merged.bar[i] * merged.baz[i]; -// } -// -// and in ARM code this becomes: -// -// ldr r0, [r5, #40] -// ldr r1, [r5, #80] -// mul r0, r1, r0 -// str r0, [r5], #4 -// -// note that we saved 2 registers here almostly "for free". -// ===---------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm-global-merge" -#include "ARM.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Attributes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -using namespace llvm; - -namespace { - class ARMGlobalMerge : public FunctionPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// target type sizes. - const TargetLowering *TLI; - - bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, - Module &M, bool isConst) const; - - public: - static char ID; // Pass identification, replacement for typeid. - explicit ARMGlobalMerge(const TargetLowering *tli) - : FunctionPass(ID), TLI(tli) {} - - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - const char *getPassName() const { - return "Merge internal globals"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - FunctionPass::getAnalysisUsage(AU); - } - - struct GlobalCmp { - const TargetData *TD; - - GlobalCmp(const TargetData *td) : TD(td) { } - - bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) { - Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType(); - Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType(); - - return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); - } - }; - }; -} // end anonymous namespace - -char ARMGlobalMerge::ID = 0; - -bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, - Module &M, bool isConst) const { - const TargetData *TD = TLI->getTargetData(); - - // FIXME: Infer the maximum possible offset depending on the actual users - // (these max offsets are different for the users inside Thumb or ARM - // functions) - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); - - // FIXME: Find better heuristics - std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); - - Type *Int32Ty = Type::getInt32Ty(M.getContext()); - - for (size_t i = 0, e = Globals.size(); i != e; ) { - size_t j = 0; - uint64_t MergedSize = 0; - std::vector<Type*> Tys; - std::vector<Constant*> Inits; - for (j = i; j != e; ++j) { - Type *Ty = Globals[j]->getType()->getElementType(); - MergedSize += TD->getTypeAllocSize(Ty); - if (MergedSize > MaxOffset) { - break; - } - Tys.push_back(Ty); - Inits.push_back(Globals[j]->getInitializer()); - } - - StructType *MergedTy = StructType::get(M.getContext(), Tys); - Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); - GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, - GlobalValue::InternalLinkage, - MergedInit, "_MergedGlobals"); - for (size_t k = i; k < j; ++k) { - Constant *Idx[2] = { - ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, k-i) - }; - Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); - Globals[k]->replaceAllUsesWith(GEP); - Globals[k]->eraseFromParent(); - } - i = j; - } - - return true; -} - - -bool ARMGlobalMerge::doInitialization(Module &M) { - SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals; - const TargetData *TD = TLI->getTargetData(); - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); - bool Changed = false; - - // Grab all non-const globals. - for (Module::global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { - // Merge is safe for "normal" internal globals only - if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) - continue; - - // Ignore fancy-aligned globals for now. - unsigned Alignment = I->getAlignment(); - Type *Ty = I->getType()->getElementType(); - if (Alignment > TD->getABITypeAlignment(Ty)) - continue; - - // Ignore all 'special' globals. - if (I->getName().startswith("llvm.") || - I->getName().startswith(".llvm.")) - continue; - - if (TD->getTypeAllocSize(Ty) < MaxOffset) { - const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering(); - if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal()) - BSSGlobals.push_back(I); - else if (I->isConstant()) - ConstGlobals.push_back(I); - else - Globals.push_back(I); - } - } - - if (Globals.size() > 1) - Changed |= doMerge(Globals, M, false); - if (BSSGlobals.size() > 1) - Changed |= doMerge(BSSGlobals, M, false); - - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore - // them in the future. - // if (ConstGlobals.size() > 1) - // Changed |= doMerge(ConstGlobals, M, true); - - return Changed; -} - -bool ARMGlobalMerge::runOnFunction(Function &F) { - return false; -} - -FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) { - return new ARMGlobalMerge(tli); -} diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 787f6a2..a5fd15b 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); unsigned Domain = MCID.TSFlags & ARMII::DomainMask; - if (MCID.mayStore()) + if (MI->mayStore()) return false; unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) @@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); if (!MI->isDebugValue()) { - if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1]) - return Hazard; - // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following // a VMLA / VMLS will cause 4 cycle stall. const MCInstrDesc &MCID = MI->getDesc(); @@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *DefMI = LastMI; const MCInstrDesc &LastMCID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. - if (!LastMCID.isBarrier() && + if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) && + !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { @@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { void ARMHazardRecognizer::Reset() { LastMI = 0; FpMLxStalls = 0; - ITBlockSize = 0; ScoreboardHazardRecognizer::Reset(); } void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { MachineInstr *MI = SU->getInstr(); - unsigned Opcode = MI->getOpcode(); - if (ITBlockSize) { - --ITBlockSize; - } else if (Opcode == ARM::t2IT) { - unsigned Mask = MI->getOperand(1).getImm(); - unsigned NumTZ = CountTrailingZeros_32(Mask); - assert(NumTZ <= 3 && "Invalid IT mask!"); - ITBlockSize = 4 - NumTZ; - MachineBasicBlock::iterator I = MI; - for (unsigned i = 0; i < ITBlockSize; ++i) { - // Advance to the next instruction, skipping any dbg_value instructions. - do { - ++I; - } while (I->isDebugValue()); - ITBlockMIs[ITBlockSize-1-i] = &*I; - } - } - if (!MI->isDebugValue()) { LastMI = MI; FpMLxStalls = 0; diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h index 2bc218d..98bfc4c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h @@ -23,6 +23,10 @@ class ARMBaseRegisterInfo; class ARMSubtarget; class MachineInstr; +/// ARMHazardRecognizer handles special constraints that are not expressed in +/// the scheduling itinerary. This is only used during postRA scheduling. The +/// ARM preRA scheduler uses an unspecialized instance of the +/// ScoreboardHazardRecognizer. class ARMHazardRecognizer : public ScoreboardHazardRecognizer { const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; @@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer { MachineInstr *LastMI; unsigned FpMLxStalls; - unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. - MachineInstr *ITBlockMIs[4]; public: ARMHazardRecognizer(const InstrItineraryData *ItinData, @@ -40,7 +42,7 @@ public: const ARMSubtarget &sti, const ScheduleDAG *DAG) : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), - TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {} + TRI(tri), STI(sti), LastMI(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 5ee009c..1eafbbc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -244,6 +244,7 @@ private: /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); + SDNode *SelectConditionalOp(SDNode *N); SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -923,7 +924,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, // The maximum alignment is equal to the memory size being referenced. unsigned LSNAlign = LSN->getAlignment(); unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8; - if (LSNAlign > MemSize && MemSize > 1) + if (LSNAlign >= MemSize && MemSize > 1) Alignment = MemSize; } else { // All other uses of addrmode6 are for intrinsics. For now just record @@ -1549,6 +1550,52 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs, return CurDAG->getTargetConstant(Alignment, MVT::i32); } +// Get the register stride update opcode of a VLD/VST instruction that +// is otherwise equivalent to the given fixed stride updating instruction. +static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { + switch (Opc) { + default: break; + case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; + case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; + case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; + case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; + case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; + case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; + case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; + case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; + + case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; + case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; + case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; + case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; + case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; + case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; + case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; + case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; + case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; + case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; + + case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; + case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; + case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; + case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; + case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; + case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; + + case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; + case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; + case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; + case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; + case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; + case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; + + case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; + case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; + case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; + } + return Opc; // If not one we handle, return it unchanged. +} + SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { @@ -1612,7 +1659,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 + // case entirely when the rest are updated to that form, too. + if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode())) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so + // check for that explicitly too. Horribly hacky, but temporary. + if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) || + !isa<ConstantSDNode>(Inc.getNode())) + Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -1754,7 +1809,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 + // case entirely when the rest are updated to that form, too. + if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // check for that explicitly too. Horribly hacky, but temporary. + if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) || + !isa<ConstantSDNode>(Inc.getNode())) + Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); } Ops.push_back(SrcReg); Ops.push_back(Pred); @@ -1977,8 +2040,14 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, Ops.push_back(MemAddr); Ops.push_back(Align); if (isUpdating) { + // fixed-stride update instructions don't have an explicit writeback + // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc); + if (!isa<ConstantSDNode>(Inc.getNode())) + Ops.push_back(Inc); + // FIXME: VLD3 and VLD4 haven't been updated to that form yet. + else if (NumVecs > 2) + Ops.push_back(Reg0); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2116,7 +2185,6 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; default: llvm_unreachable("Unknown so_reg opcode!"); - break; } SDValue SOShImm = CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); @@ -2227,9 +2295,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) // Pattern complexity = 18 cost = 1 size = 0 - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; if (Subtarget->isThumb()) { SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, CCVal, CCR, InFlag); @@ -2286,8 +2351,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; + default: llvm_unreachable("Illegal conditional move type!"); case MVT::i32: Opc = Subtarget->isThumb() ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) @@ -2303,6 +2367,115 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) { + SDValue FalseVal = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + ARMCC::CondCodes CCVal = + (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); + SDValue CCR = N->getOperand(3); + assert(CCR.getOpcode() == ISD::Register); + SDValue InFlag = N->getOperand(4); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + + if (Subtarget->isThumb()) { + SDValue CPTmp0; + SDValue CPTmp1; + if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break; + case ARMISD::COR: Opc = ARM::t2ORRCCrs; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); + } + + ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); + if (T) { + unsigned TrueImm = T->getZExtValue(); + if (is_t2_so_imm(TrueImm)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCri; break; + case ARMISD::COR: Opc = ARM::t2ORRCCri; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCri; break; + } + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + } + + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break; + case ARMISD::COR: Opc = ARM::t2ORRCCrr; break; + case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break; + } + SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrsi; break; + case ARMISD::COR: Opc = ARM::ORRCCrsi; break; + case ARMISD::CXOR: Opc = ARM::EORCCrsi; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); + } + + if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrsr; break; + case ARMISD::COR: Opc = ARM::ORRCCrsr; break; + case ARMISD::CXOR: Opc = ARM::EORCCrsr; break; + } + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8); + } + + ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); + if (T) { + unsigned TrueImm = T->getZExtValue(); + if (is_so_imm(TrueImm)) { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCri; break; + case ARMISD::COR: Opc = ARM::ORRCCri; break; + case ARMISD::CXOR: Opc = ARM::EORCCri; break; + } + SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); + } + } + + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ARMISD::CAND: Opc = ARM::ANDCCrr; break; + case ARMISD::COR: Opc = ARM::ORRCCrr; break; + case ARMISD::CXOR: Opc = ARM::EORCCrr; break; + } + SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); +} + /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2316,7 +2489,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ SDValue XORSrc0 = N->getOperand(0); SDValue XORSrc1 = N->getOperand(1); - DebugLoc DL = N->getDebugLoc(); EVT VT = N->getValueType(0); if (DisableARMIntABS) @@ -2641,6 +2813,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); + case ARMISD::CAND: + case ARMISD::COR: + case ARMISD::CXOR: + return SelectConditionalOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -2649,7 +2825,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VZIPd32; break; + // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VZIPq8; break; case MVT::v8i16: Opc = ARM::VZIPq16; break; case MVT::v4f32: @@ -2668,7 +2845,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VUZPd32; break; + // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VUZPq8; break; case MVT::v8i16: Opc = ARM::VUZPq16; break; case MVT::v4f32: @@ -2715,8 +2893,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP: { - unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo, - ARM::VLD2DUPd32Pseudo }; + unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, + ARM::VLD2DUPd32 }; return SelectVLDDup(N, false, 2, Opcodes); } @@ -2733,8 +2911,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD, - ARM::VLD2DUPd32Pseudo_UPD }; + unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed, + ARM::VLD2DUPd32wb_fixed }; return SelectVLDDup(N, true, 2, Opcodes); } @@ -2751,24 +2929,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD1_UPD: { - unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD, - ARM::VLD1d32_UPD, ARM::VLD1d64_UPD }; - unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD, - ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed, + ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed }; + unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed, + ARM::VLD1q16wb_fixed, + ARM::VLD1q32wb_fixed, + ARM::VLD1q64wb_fixed }; return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD, - ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD, - ARM::VLD2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed, + ARM::VLD2d16wb_fixed, + ARM::VLD2d32wb_fixed, + ARM::VLD1q64wb_fixed}; + unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, + ARM::VLD2q16PseudoWB_fixed, + ARM::VLD2q32PseudoWB_fixed }; return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VLD3_UPD: { unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, - ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD }; + ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; @@ -2780,7 +2963,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD4_UPD: { unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, - ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD }; + ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed}; unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; @@ -2815,24 +2998,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VST1_UPD: { - unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD, - ARM::VST1d32_UPD, ARM::VST1d64_UPD }; - unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD, - ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed, + ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed }; + unsigned QOpcodes[] = { ARM::VST1q8wb_fixed, + ARM::VST1q16wb_fixed, + ARM::VST1q32wb_fixed, + ARM::VST1q64wb_fixed }; return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD, - ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD, - ARM::VST2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST2d8wb_fixed, + ARM::VST2d16wb_fixed, + ARM::VST2d32wb_fixed, + ARM::VST1q64wb_fixed}; + unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, + ARM::VST2q16PseudoWB_fixed, + ARM::VST2q32PseudoWB_fixed }; return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VST3_UPD: { unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD, - ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD }; + ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, ARM::VST3q16Pseudo_UPD, ARM::VST3q32Pseudo_UPD }; @@ -2844,7 +3032,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VST4_UPD: { unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, - ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD }; + ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, ARM::VST4q16Pseudo_UPD, ARM::VST4q32Pseudo_UPD }; @@ -2993,14 +3181,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld1: { unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, ARM::VLD1d32, ARM::VLD1d64 }; - unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo, - ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, + ARM::VLD1q32, ARM::VLD1q64}; return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vld2: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo, - ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo }; + unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, + ARM::VLD2d32, ARM::VLD1q64 }; unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); @@ -3054,14 +3242,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst1: { unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, ARM::VST1d32, ARM::VST1d64 }; - unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo, - ARM::VST1q32Pseudo, ARM::VST1q64Pseudo }; + unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, + ARM::VST1q32, ARM::VST1q64 }; return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst2: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo, - ARM::VST2d32Pseudo, ARM::VST1q64Pseudo }; + unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, + ARM::VST2d32, ARM::VST1q64 }; unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, ARM::VST2q32Pseudo }; return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); @@ -3122,14 +3310,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; case Intrinsic::arm_neon_vtbl2: - return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo); + return SelectVTBL(N, false, 2, ARM::VTBL2); case Intrinsic::arm_neon_vtbl3: return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); case Intrinsic::arm_neon_vtbl4: return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); case Intrinsic::arm_neon_vtbx2: - return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo); + return SelectVTBL(N, true, 2, ARM::VTBX2); case Intrinsic::arm_neon_vtbx3: return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); case Intrinsic::arm_neon_vtbx4: @@ -3163,7 +3351,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops.push_back(N->getOperand(2)); Ops.push_back(getAL(CurDAG)); // Predicate Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register - return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT, + return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops.data(), Ops.size()); } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index f60d177..a103c94 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13,13 +13,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-isel" +#include "ARMISelLowering.h" #include "ARM.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" -#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" @@ -40,18 +39,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <sstream> using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -73,7 +69,7 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); -namespace llvm { +namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, @@ -89,7 +85,7 @@ namespace llvm { } // The APCS parameter registers. -static const unsigned GPRArgRegs[] = { +static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -108,8 +104,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); - if (ElemTy != MVT::i32) { + if (ElemTy == MVT::i32) { + setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom); + setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom); + setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + } else { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); @@ -121,18 +123,12 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); - setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) - setTruncStoreAction(VT.getSimpleVT(), - (MVT::SimpleValueType)InnerVT, Expand); } - setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { @@ -263,7 +259,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); - if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { // Double-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 2 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); @@ -388,8 +384,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Long long helper functions // RTABI chapter 4.2, Table 9 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); @@ -405,21 +399,28 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); + setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); + setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); // Memory operations // RTABI chapter 4.3.4 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); + setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -433,7 +434,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else addRegisterClass(MVT::i32, ARM::GPRRegisterClass); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, ARM::DPRRegisterClass); @@ -441,6 +443,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); } + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + } + + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -457,13 +472,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. + // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively + // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + // FIXME: Code duplication: FDIV and FREM are expanded always, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); + // FIXME: Create unittest. + // In another words, find a way when "copysign" appears in DAG with vector + // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + // FIXME: Code duplication: SETCC has custom operation action, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); + // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -476,13 +501,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); - setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + setOperationAction(ISD::FSIN, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); + setOperationAction(ISD::FPOW, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -498,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v1i64, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with - // a destination type that is wider than the source. + // a destination type that is wider than the source, and nor does + // it have a FP_TO_[SU]INT instruction with a narrower destination than + // source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -519,6 +558,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); + + // It is legal to extload from v4i8 to v4i16 or v4i32. + MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, + MVT::v4i16, MVT::v2i16, + MVT::v2i32}; + for (unsigned i = 0; i < 6; ++i) { + setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); + setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); + setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); + } } computeRegisterProperties(); @@ -576,6 +625,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); + // These just redirect to CTTZ and CTLZ on ARM. + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -606,10 +659,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setExceptionPointerRegister(ARM::R0); - setExceptionSelectorRegister(ARM::R1); + + if (!Subtarget->isTargetDarwin()) { + // Non-Darwin platforms may return values in these registers via the + // personality function. + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setExceptionPointerRegister(ARM::R0); + setExceptionSelectorRegister(ARM::R1); + } setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use @@ -664,7 +722,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); @@ -676,7 +735,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetDarwin()) { setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); - setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); } @@ -703,18 +761,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); + if (!Subtarget->hasVFP4()) { + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + } // Various VFP goodness - if (!UseSoftFloat && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { // int <-> fp are custom expanded into bit_convert + ARMISD ops. if (Subtarget->hasVFP2()) { setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -735,20 +796,27 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) - setTargetDAGCombine(ISD::OR); - if (Subtarget->hasNEON()) + if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); + } + + if (Subtarget->hasV6Ops()) + setTargetDAGCombine(ISD::SRL); setStackPointerRegisterToSaveRestore(ARM::SP); - if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || + !Subtarget->hasVFP2()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; + maxStoresPerMemset = 16; + maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. @@ -828,7 +896,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; + case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::CAND: return "ARMISD::CAND"; + case ARMISD::COR: return "ARMISD::COR"; + case ARMISD::CXOR: return "ARMISD::CXOR"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -851,7 +923,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; - case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; @@ -899,6 +970,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; + case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; @@ -949,7 +1021,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { /// getRegClassFor - Return the register class that should be used for the /// specified value type. -TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { +const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. @@ -984,7 +1056,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { if (VT == MVT::Glue || VT == MVT::Other) continue; if (VT.isFloatingPoint() || VT.isVector()) - return Sched::Latency; + return Sched::ILP; } if (!N->isMachineOpcode()) @@ -999,7 +1071,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; if (!Itins->isEmpty() && Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) - return Sched::Latency; + return Sched::ILP; return Sched::RegPressure; } @@ -1081,18 +1153,19 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, if (!Subtarget->isAAPCS_ABI()) return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); else if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard && !isVarArg) + getTargetMachine().Options.FloatABIType == FloatABI::Hard && + !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + if (!isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + // Fallthrough case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); - case CallingConv::GHC: - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } } @@ -1215,7 +1288,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SDValue ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -1334,7 +1407,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } @@ -1350,12 +1423,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); - // TODO: Disable AlwaysInline when it becomes possible - // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/true, + /*AlwaysInline=*/false, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -1429,7 +1500,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); @@ -1444,7 +1515,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); @@ -1465,7 +1536,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1494,7 +1565,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1513,12 +1584,20 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; + else if (doesNotRet && isDirect && !isARMFunc && + Subtarget->hasRAS() && !Subtarget->isThumb1Only()) + // "mov lr, pc; b _foo" to avoid confusing the RSP + CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - CallOpc = (isDirect || Subtarget->hasV5TOps()) - ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) - : ARMISD::CALL_NOLINK; + if (!isDirect && !Subtarget->hasV5TOps()) { + CallOpc = ARMISD::CALL_NOLINK; + } else if (doesNotRet && isDirect && Subtarget->hasRAS()) + // "mov lr, pc; b _foo" to avoid confusing the RSP + CallOpc = ARMISD::CALL_NOLINK; + else + CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } std::vector<SDValue> Ops; @@ -1531,6 +1610,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -1558,7 +1643,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, /// and then confiscate the rest of the parameter registers to insure /// this. void -llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { +ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { unsigned reg = State->AllocateReg(GPRArgRegs, 4); assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && @@ -1588,7 +1673,7 @@ llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, - const ARMInstrInfo *TII) { + const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { @@ -1723,8 +1808,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const ARMInstrInfo *TII = - ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -1852,63 +1936,72 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return result; } -bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { +bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; - unsigned NumCopies = 0; - SDNode* Copies[2]; - SDNode *Use = *N->use_begin(); - if (Use->getOpcode() == ISD::CopyToReg) { - Copies[NumCopies++] = Use; - } else if (Use->getOpcode() == ARMISD::VMOVRRD) { + SDValue TCChain = Chain; + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() == ISD::CopyToReg) { + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + return false; + TCChain = Copy->getOperand(0); + } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { + SDNode *VMov = Copy; // f64 returned in a pair of GPRs. - for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); + SmallPtrSet<SDNode*, 2> Copies; + for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::CopyToReg) return false; - Copies[UI.getUse().getResNo()] = *UI; - ++NumCopies; + Copies.insert(*UI); } - } else if (Use->getOpcode() == ISD::BITCAST) { + if (Copies.size() > 2) + return false; + + for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); + UI != UE; ++UI) { + SDValue UseChain = UI->getOperand(0); + if (Copies.count(UseChain.getNode())) + // Second CopyToReg + Copy = *UI; + else + // First CopyToReg + TCChain = UseChain; + } + } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. - if (!Use->hasNUsesOfValue(1, 0)) + if (!Copy->hasOneUse()) return false; - Use = *Use->use_begin(); - if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) + Copy = *Copy->use_begin(); + if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Copies[NumCopies++] = Use; + Chain = Copy->getOperand(0); } else { return false; } - if (NumCopies != 1 && NumCopies != 2) - return false; - bool HasRet = false; - for (unsigned i = 0; i < NumCopies; ++i) { - SDNode *Copy = Copies[i]; - for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); - UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - SDNode *Use = *UI; - if (Use == Copies[0] || Use == Copies[1]) - continue; - return false; - } - if (UI->getOpcode() != ARMISD::RET_FLAG) - return false; - HasRet = true; - } + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != ARMISD::RET_FLAG) + return false; + HasRet = true; } - return HasRet; + if (!HasRet) + return false; + + Chain = TCChain; + return true; } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARMTailCalls) + if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) return false; if (!CI->isTailCall()) @@ -1965,7 +2058,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); if (RelocM == Reloc::Static) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1989,7 +2082,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2005,7 +2098,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/true, + 0, CallingConv::C, /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } @@ -2037,7 +2131,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2045,7 +2139,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } else { // local exec model ARMConstantPoolValue *CPV = @@ -2054,7 +2148,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } // The address of the thread local variable is the add of the thread @@ -2092,13 +2186,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), + false, false, false, 0); return Result; } @@ -2115,7 +2210,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } } @@ -2128,7 +2223,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - // FIXME: Enable this for static codegen when tool issues are fixed. + // FIXME: Enable this for static codegen when tool issues are fixed. Also + // update ARMFastISel::ARMMaterializeGV. if (Subtarget->useMovt() && RelocM != Reloc::Static) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -2143,7 +2239,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), + false, false, false, 0); return Result; } @@ -2163,7 +2260,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -2173,7 +2270,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), - false, false, 0); + false, false, false, 0); return Result; } @@ -2195,20 +2292,12 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } SDValue -ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) - const { - DebugLoc dl = Op.getDebugLoc(); - return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - Op.getOperand(0), Op.getOperand(1)); -} - -SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Val = DAG.getConstant(0, MVT::i32); @@ -2253,7 +2342,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); if (RelocM == Reloc::PIC_) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2366,7 +2455,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else @@ -2385,7 +2474,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0); + false, false, false, 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -2452,7 +2541,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SmallVector<SDValue, 4> MemOps; for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else @@ -2521,7 +2610,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0); + false, false, false, 0); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); @@ -2535,7 +2624,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } else { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (RegVT == MVT::f32) RC = ARM::SPRRegisterClass; @@ -2612,7 +2701,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + false, false, false, 0)); } lastInsIndex = index; } @@ -2777,6 +2866,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the + // undefined bits before doing a full-word comparison with zero. + Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, + DAG.getConstant(1, Cond.getValueType())); + return DAG.getSelectCC(dl, Cond, DAG.getConstant(0, Cond.getValueType()), SelectTrue, SelectFalse, ISD::SETNE); @@ -2847,7 +2941,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(MVT::i32, Op.getDebugLoc(), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); + Ld->isInvariant(), Ld->getAlignment()); llvm_unreachable("Unknown VFP cmp argument!"); } @@ -2866,7 +2960,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); + Ld->isInvariant(), Ld->getAlignment()); EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); @@ -2876,7 +2970,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), - NewAlign); + Ld->isInvariant(), NewAlign); return; } @@ -2894,12 +2988,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); DebugLoc dl = Op.getDebugLoc(); - bool SeenZero = false; - if (canChangeToInt(LHS, SeenZero, Subtarget) && - canChangeToInt(RHS, SeenZero, Subtarget) && - // If one of the operand is zero, it's safe to ignore the NaN case since - // we only care about equality comparisons. - (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { + bool LHSSeenZero = false; + bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); + bool RHSSeenZero = false; + bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); + if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { // If unsafe fp math optimization is enabled and there are no other uses of // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. @@ -2908,10 +3001,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { else if (CC == ISD::SETUNE) CC = ISD::SETNE; + SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); SDValue ARMcc; if (LHS.getValueType() == MVT::f32) { - LHS = bitcastf32Toi32(LHS, DAG); - RHS = bitcastf32Toi32(RHS, DAG); + LHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(LHS, DAG), Mask); + RHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, @@ -2922,6 +3018,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue RHS1, RHS2; expandf64Toi32(LHS, DAG, LHS1, LHS2); expandf64Toi32(RHS, DAG, RHS1, RHS2); + LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); + RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); @@ -2950,7 +3048,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - if (UnsafeFPMath && + if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { SDValue Result = OptimizeVFPBrcond(Op, DAG); @@ -3000,25 +3098,48 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(), - false, false, 0); + false, false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(), false, false, 0); + MachinePointerInfo::getJumpTable(), + false, false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } } +static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + if (Op.getValueType().getVectorElementType() == MVT::i32) { + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); + } + + assert(Op.getOperand(0).getValueType() == MVT::v4f32 && + "Invalid type for custom lowering!"); + if (VT != MVT::v4i16) + return DAG.UnrollVectorOp(Op.getNode()); + + Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); +} + static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + if (VT.isVector()) + return LowerVectorFP_TO_INT(Op, DAG); + DebugLoc dl = Op.getDebugLoc(); unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::FP_TO_SINT: Opc = ARMISD::FTOSI; break; @@ -3034,6 +3155,12 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { + if (VT.getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); + } + assert(Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"); if (VT != MVT::v4f32) @@ -3042,8 +3169,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { unsigned CastOpc; unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: CastOpc = ISD::SIGN_EXTEND; Opc = ISD::SINT_TO_FP; @@ -3067,8 +3193,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: Opc = ARMISD::SITOF; break; @@ -3176,7 +3301,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ SDValue Offset = DAG.getConstant(4, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } // Return LR, which contains the return address. Mark it an implicit live-in. @@ -3197,7 +3322,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return FrameAddr; } @@ -3442,7 +3567,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (Op.getOperand(1).getValueType().isFloatingPoint()) { switch (SetCCOpcode) { - default: llvm_unreachable("Illegal FP comparison"); break; + default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: case ISD::SETNE: Invert = true; // Fallthrough case ISD::SETOEQ: @@ -3481,7 +3606,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { } else { // Integer comparisons. switch (SetCCOpcode) { - default: llvm_unreachable("Illegal integer comparison"); break; + default: llvm_unreachable("Illegal integer comparison"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; @@ -3688,14 +3813,65 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, default: llvm_unreachable("unexpected size for isNEONModifiedImm"); - return SDValue(); } unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, MVT::i32); } -static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, +SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const { + if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + return SDValue(); + + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + assert(Op.getValueType() == MVT::f32 && + "ConstantFP custom lowering should only occur for f32."); + + // Try splatting with a VMOV.f32... + APFloat FPVal = CFP->getValueAPF(); + int ImmVal = ARM_AM::getFP32Imm(FPVal); + if (ImmVal != -1) { + DebugLoc DL = Op.getDebugLoc(); + SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, + NewVal); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, + DAG.getConstant(0, MVT::i32)); + } + + // If that fails, try a VMOV.i32 + EVT VMovVT; + unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); + SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, + VMOVModImm); + if (NewVal != SDValue()) { + DebugLoc DL = Op.getDebugLoc(); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, + NewVal); + SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, + VecConstant); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, + DAG.getConstant(0, MVT::i32)); + } + + // Finally, try a VMVN.i32 + NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, + VMVNModImm); + if (NewVal != SDValue()) { + DebugLoc DL = Op.getDebugLoc(); + SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); + SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, + VecConstant); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, + DAG.getConstant(0, MVT::i32)); + } + + return SDValue(); +} + + +static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; @@ -3734,8 +3910,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) -static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned BlockSize) { +static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); @@ -3761,15 +3936,14 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { +static bool isVTBLMask(ArrayRef<int> M, EVT VT) { // We can handle <8 x i8> vector shuffles. If the index in the mask is out of // range, then 0 is placed into the resulting vector. So pretty much any mask // of 8 elements can work here. return VT == MVT::v8i8 && M.size() == 8; } -static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3787,8 +3961,7 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. -static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3803,8 +3976,7 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3827,8 +3999,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, -static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3852,8 +4023,7 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3878,8 +4048,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. -static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3955,6 +4124,15 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } + + // Use vmov.f32 to materialize other v2f32 and v4f32 splats. + if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { + int ImmVal = ARM_AM::getFP32Imm(SplatBits); + if (ImmVal != -1) { + SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); + return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); + } + } } } @@ -4302,7 +4480,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, } static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, - SmallVectorImpl<int> &ShuffleMask, + ArrayRef<int> ShuffleMask, SelectionDAG &DAG) { // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); @@ -4310,7 +4488,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, DebugLoc DL = Op.getDebugLoc(); SmallVector<SDValue, 8> VTBLMask; - for (SmallVectorImpl<int>::iterator + for (ArrayRef<int>::iterator I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); @@ -4330,7 +4508,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); - SmallVector<int, 8> ShuffleMask; // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again @@ -4338,7 +4515,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of inconsistencies between legalization and selection. // FIXME: floating-point vectors should be canonicalized to integer vectors // of the same time so that they get CSEd properly. - SVN->getMask(ShuffleMask); + ArrayRef<int> ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); if (EltSize <= 32) { @@ -4347,9 +4524,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; + // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa<ConstantSDNode>(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i32)); } @@ -4450,6 +4642,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + // INSERT_VECTOR_ELT is legal only for immediate indexes. + SDValue Lane = Op.getOperand(2); + if (!isa<ConstantSDNode>(Lane)) + return SDValue(); + + return Op; +} + static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); @@ -4526,11 +4727,10 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, unsigned EltSize = VT.getVectorElementType().getSizeInBits(); unsigned HalfSize = EltSize / 2; if (isSigned) { - int64_t SExtVal = C->getSExtValue(); - if ((SExtVal >> HalfSize) != (SExtVal >> EltSize)) + if (!isIntN(HalfSize, C->getSExtValue())) return false; } else { - if ((C->getZExtValue() >> HalfSize) != 0) + if (!isUIntN(HalfSize, C->getZExtValue())) return false; } continue; @@ -4569,7 +4769,8 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment()); // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. if (N->getOpcode() == ISD::BITCAST) { @@ -4874,7 +5075,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { - default: assert(0 && "Invalid code"); + default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = ARMISD::ADDC; break; case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; case ISD::SUBC: Opc = ARMISD::SUBC; break; @@ -4959,7 +5160,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); - case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); @@ -4971,8 +5171,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); + case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -4986,7 +5188,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); } - return SDValue(); } /// ReplaceNodeResults - Replace the results of node with an illegal result @@ -4998,7 +5199,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); - break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; @@ -5194,7 +5394,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5304,7 +5504,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -5414,7 +5614,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned storesuccess = MRI.createVirtualRegister(TRC); @@ -5500,52 +5700,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, return BB; } -/// EmitBasePointerRecalculation - For functions using a base pointer, we -/// rematerialize it (via the frame pointer). -void ARMTargetLowering:: -EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); - MachineFunction &MF = *MI->getParent()->getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); - - if (!RI.hasBasePointer(MF)) return; - - MachineBasicBlock::iterator MBBI = MI; - - int32_t NumBytes = AFI->getFramePtrSpillOffset(); - unsigned FramePtr = RI.getFrameRegister(MF); - assert(MF.getTarget().getFrameLowering()->hasFP(MF) && - "Base pointer without frame pointer?"); - - if (AFI->isThumb2Function()) - llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *AII); - else if (AFI->isThumbFunction()) - llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, *AII, RI); - else - llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *AII); - - if (!RI.needsStackRealignment(MF)) return; - - // If there's dynamic realignment, adjust for it. - MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - assert(!AFI->isThumb1OnlyFunction()); - - // Emit bic r6, r6, MaxAlign - unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; - AddDefaultCC( - AddDefaultPred( - BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6) - .addReg(ARM::R6, RegState::Kill) - .addImm(MaxAlign - 1))); -} - /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -5580,8 +5734,6 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, 4, 4); - EmitBasePointerRecalculation(MI, MBB, DispatchBB); - // Load the address of the dispatch MBB into the jump buffer. if (isThumb2) { // Incoming value: jbuf @@ -5683,7 +5835,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; unsigned MaxCSNum = 0; MachineModuleInfo &MMI = MF->getMMI(); - for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { + for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; + ++BB) { if (!BB->isLandingPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing @@ -5741,12 +5894,10 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); DispatchBB->addSuccessor(DispContBB); - // Insert and renumber MBBs. - MachineBasicBlock *Last = &MF->back(); + // Insert and MBBs. MF->insert(MF->end(), DispatchBB); MF->insert(MF->end(), DispContBB); MF->insert(MF->end(), TrapBB); - MF->RenumberBlocks(Last); // Insert code into the entry block that creates and registers the function // context. @@ -5757,35 +5908,63 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); + if (AFI->isThumb1OnlyFunction()) + BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); + else if (!Subtarget->hasVFP2()) + BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); + else + BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); + + unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) .addFrameIndex(FI) .addImm(4) .addMemOperand(FIMMOLd)); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) - .addReg(NewVReg1) - .addImm(LPadList.size())); + + if (NumLPads < 256) { + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) + .addReg(NewVReg1) + .addImm(LPadList.size())); + } else { + unsigned VReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) + .addImm(NumLPads & 0xFFFF)); + + unsigned VReg2 = VReg1; + if ((NumLPads & 0xFFFF0000) != 0) { + VReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) + .addReg(VReg1) + .addImm(NumLPads >> 16)); + } + + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) + .addReg(NewVReg1) + .addReg(VReg2)); + } + BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2) + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) .addJumpTableIndex(MJTI) .addImm(UId)); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); AddDefaultCC( AddDefaultPred( - BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3) - .addReg(NewVReg2, RegState::Kill) + BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) + .addReg(NewVReg3, RegState::Kill) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) - .addReg(NewVReg3, RegState::Kill) + .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg1) .addJumpTableIndex(MJTI) .addImm(UId); @@ -5796,9 +5975,30 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { .addImm(1) .addMemOperand(FIMMOLd)); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) - .addReg(NewVReg1) - .addImm(LPadList.size())); + if (NumLPads < 256) { + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) + .addReg(NewVReg1) + .addImm(NumLPads)); + } else { + MachineConstantPool *ConstantPool = MF->getConstantPool(); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + const Constant *C = ConstantInt::get(Int32Ty, NumLPads); + + // MachineConstantPool wants an explicit alignment. + unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + if (Align == 0) + Align = getTargetData()->getTypeAllocSize(C->getType()); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); + + unsigned VReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) + .addReg(VReg1, RegState::Define) + .addConstantPoolIndex(Idx)); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) + .addReg(NewVReg1) + .addReg(VReg1)); + } + BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) @@ -5847,38 +6047,77 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { .addFrameIndex(FI) .addImm(4) .addMemOperand(FIMMOLd)); - AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) - .addReg(NewVReg1) - .addImm(LPadList.size())); + + if (NumLPads < 256) { + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) + .addReg(NewVReg1) + .addImm(NumLPads)); + } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { + unsigned VReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) + .addImm(NumLPads & 0xFFFF)); + + unsigned VReg2 = VReg1; + if ((NumLPads & 0xFFFF0000) != 0) { + VReg2 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) + .addReg(VReg1) + .addImm(NumLPads >> 16)); + } + + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) + .addReg(NewVReg1) + .addReg(VReg2)); + } else { + MachineConstantPool *ConstantPool = MF->getConstantPool(); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + const Constant *C = ConstantInt::get(Int32Ty, NumLPads); + + // MachineConstantPool wants an explicit alignment. + unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty); + if (Align == 0) + Align = getTargetData()->getTypeAllocSize(C->getType()); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); + + unsigned VReg1 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) + .addReg(VReg1, RegState::Define) + .addConstantPoolIndex(Idx) + .addImm(0)); + AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) + .addReg(NewVReg1) + .addReg(VReg1, RegState::Kill)); + } + BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) .addMBB(TrapBB) .addImm(ARMCC::HI) .addReg(ARM::CPSR); - unsigned NewVReg2 = MRI->createVirtualRegister(TRC); + unsigned NewVReg3 = MRI->createVirtualRegister(TRC); AddDefaultCC( - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2) + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) .addReg(NewVReg1) .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); - unsigned NewVReg3 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3) + unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) .addJumpTableIndex(MJTI) .addImm(UId)); MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), MachineMemOperand::MOLoad, 4, 4); - unsigned NewVReg4 = MRI->createVirtualRegister(TRC); + unsigned NewVReg5 = MRI->createVirtualRegister(TRC); AddDefaultPred( - BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4) - .addReg(NewVReg2, RegState::Kill) - .addReg(NewVReg3) + BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) + .addReg(NewVReg3, RegState::Kill) + .addReg(NewVReg4) .addImm(0) .addMemOperand(JTMMOLd)); BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) - .addReg(NewVReg4, RegState::Kill) - .addReg(NewVReg3) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg4) .addJumpTableIndex(MJTI) .addImm(UId); } @@ -5893,21 +6132,24 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { PrevMBB = CurMBB; } + // N.B. the order the invoke BBs are processed in doesn't matter here. const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); - const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); + const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); + SmallVector<MachineBasicBlock*, 64> MBBLPads; for (SmallPtrSet<MachineBasicBlock*, 64>::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { MachineBasicBlock *BB = *I; // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. - for (MachineBasicBlock::succ_iterator - SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SMBB = *SI; + SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), + BB->succ_end()); + while (!Successors.empty()) { + MachineBasicBlock *SMBB = Successors.pop_back_val(); if (SMBB->isLandingPad()) { BB->removeSuccessor(SMBB); - SMBB->setIsLandingPad(false); + MBBLPads.push_back(SMBB); } } @@ -5919,7 +6161,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // executed. for (MachineBasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { - if (!II->getDesc().isCall()) continue; + if (!II->isCall()) continue; DenseMap<unsigned, bool> DefRegs; for (MachineInstr::mop_iterator @@ -5932,15 +6174,31 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineInstrBuilder MIB(&*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { - if (!TRC->contains(SavedRegs[i])) continue; - if (!DefRegs[SavedRegs[i]]) - MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead); + unsigned Reg = SavedRegs[i]; + if (Subtarget->isThumb2() && + !ARM::tGPRRegisterClass->contains(Reg) && + !ARM::hGPRRegisterClass->contains(Reg)) + continue; + else if (Subtarget->isThumb1Only() && + !ARM::tGPRRegisterClass->contains(Reg)) + continue; + else if (!Subtarget->isThumb() && + !ARM::GPRRegisterClass->contains(Reg)) + continue; + if (!DefRegs[Reg]) + MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } break; } } + // Mark all former landing pads as non-landing pads. The dispatch is the only + // landing pad now. + for (SmallVectorImpl<MachineBasicBlock*>::iterator + I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) + (*I)->setIsLandingPad(false); + // The instruction is gone now. MI->eraseFromParent(); @@ -6222,20 +6480,28 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; } + case ARM::Int_eh_sjlj_setjmp: + case ARM::Int_eh_sjlj_setjmp_nofp: + case ARM::tInt_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp_nofp: + EmitSjLjDispatchBlock(MI, BB); + return BB; + case ARM::ABS: case ARM::t2ABS: { // To insert an ABS instruction, we have to insert the // diamond control-flow pattern. The incoming instruction knows the // source vreg to test against 0, the destination vreg to set, // the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. + // true/false values to select between, and a branch opcode to use. // It transforms // V1 = ABS V0 // into // V2 = MOVS V0 // BCC (branch to SinkBB if V0 >= 0) // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) - // SinkBB: V1 = PHI(V2, V3) + // SinkBB: V1 = PHI(V2, V3) const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator BBI = BB; ++BBI; @@ -6276,19 +6542,19 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(ARM::CPSR, RegState::Define); // insert a bcc with opposite CC to ARMCC::MI at the end of BB - BuildMI(BB, dl, + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); // insert rsbri in RSBBB // Note: BCC and rsbri will be converted into predicated rsbmi // by if-conversion pass - BuildMI(*RSBBB, RSBBB->begin(), dl, + BuildMI(*RSBBB, RSBBB->begin(), dl, TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) .addReg(NewMovDstReg, RegState::Kill) .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - // insert PHI in SinkBB, + // insert PHI in SinkBB, // reuse ABSDstReg to not change uses of ABS instruction BuildMI(*SinkBB, SinkBB->begin(), dl, TII->get(ARM::PHI), ABSDstReg) @@ -6296,7 +6562,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(NewMovDstReg).addMBB(BB); // remove ABS instruction - MI->eraseFromParent(); + MI->eraseFromParent(); // return last added BB return SinkBB; @@ -6306,32 +6572,40 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.hasPostISelHook()) { + if (!MI->hasPostISelHook()) { assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); return; } + const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's // register to CPSR, and remove the redundant implicit def. // - // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>). + // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); if (NewOpc) { const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); - MI->setDesc(TII->get(NewOpc)); + MCID = &TII->get(NewOpc); + + assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && + "converted opcode should be the same except for cc_out"); + + MI->setDesc(*MCID); + + // Add the optional cc_out operand + MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); } - unsigned ccOutIdx = MCID.getNumOperands() - 1; + unsigned ccOutIdx = MCID->getNumOperands() - 1; // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. - if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) { + if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } @@ -6339,7 +6613,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // since we already have an optional CPSR def. bool definesCPSR = false; bool deadCPSR = false; - for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands(); + for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { @@ -6513,7 +6787,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; default: - assert(0 && "Invalid vector element type for padd optimization."); + llvm_unreachable("Invalid vector element type for padd optimization."); } SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), @@ -6632,41 +6906,115 @@ static SDValue PerformMULCombine(SDNode *N, if (!C) return SDValue(); - uint64_t MulAmt = C->getZExtValue(); + int64_t MulAmt = C->getSExtValue(); unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); DebugLoc DL = N->getDebugLoc(); SDValue Res; MulAmt >>= ShiftAmt; - if (isPowerOf2_32(MulAmt - 1)) { - // (mul x, 2^N + 1) => (add (shl x, N), x) - Res = DAG.getNode(ISD::ADD, DL, VT, - V, DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(Log2_32(MulAmt-1), - MVT::i32))); - } else if (isPowerOf2_32(MulAmt + 1)) { - // (mul x, 2^N - 1) => (sub (shl x, N), x) - Res = DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(Log2_32(MulAmt+1), - MVT::i32)), - V); - } else - return SDValue(); + + if (MulAmt >= 0) { + if (isPowerOf2_32(MulAmt - 1)) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmt - 1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmt + 1)) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmt + 1), + MVT::i32)), + V); + } else + return SDValue(); + } else { + uint64_t MulAmtAbs = -MulAmt; + if (isPowerOf2_32(MulAmtAbs + 1)) { + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + Res = DAG.getNode(ISD::SUB, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmtAbs + 1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmtAbs - 1)) { + // (mul x, -(2^N + 1)) => - (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmtAbs-1), + MVT::i32))); + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, MVT::i32),Res); + + } else + return SDValue(); + } if (ShiftAmt != 0) - Res = DAG.getNode(ISD::SHL, DL, VT, Res, - DAG.getConstant(ShiftAmt, MVT::i32)); + Res = DAG.getNode(ISD::SHL, DL, VT, + Res, DAG.getConstant(ShiftAmt, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } +static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { + if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse()) + return false; + + SDValue FalseVal = N.getOperand(0); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal); + if (!C) + return false; + if (AllOnes) + return C->isAllOnesValue(); + return C->isNullValue(); +} + +/// formConditionalOp - Combine an operation with a conditional move operand +/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) +/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) +static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, + bool Commutable) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + bool isAND = N->getOpcode() == ISD::AND; + bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); + if (!isCand && Commutable) { + isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); + if (isCand) + std::swap(N0, N1); + } + if (!isCand) + return SDValue(); + + unsigned Opc = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ISD::AND: Opc = ARMISD::CAND; break; + case ISD::OR: Opc = ARMISD::COR; break; + case ISD::XOR: Opc = ARMISD::CXOR; break; + } + return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, + N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), + N1.getOperand(4)); +} + static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); @@ -6697,6 +7045,13 @@ static SDValue PerformANDCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (and x, (cmov -1, y, cond)) => (and.cond x, y) + SDValue CAND = formConditionalOp(N, DAG, true); + if (CAND.getNode()) + return CAND; + } + return SDValue(); } @@ -6733,6 +7088,13 @@ static SDValue PerformORCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (or x, (cmov 0, y, cond)) => (or.cond x, y) + SDValue COR = formConditionalOp(N, DAG, true); + if (COR.getNode()) + return COR; + } + SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::AND) return SDValue(); @@ -6881,6 +7243,25 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); } +static SDValue PerformXORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + if (!Subtarget->isThumb1Only()) { + // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) + SDValue CXOR = formConditionalOp(N, DAG, true); + if (CXOR.getNode()) + return CXOR; + } + + return SDValue(); +} + /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff /// the bits being cleared by the AND are not demanded by the BFI. static SDValue PerformBFICombine(SDNode *N, @@ -6926,13 +7307,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), + LD->isNonTemporal(), LD->isInvariant(), std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); @@ -6967,15 +7349,99 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // Bitcast an i64 store extracted from a vector to f64. - // Otherwise, the i64 value will be legalized to a pair of i32 values. StoreSDNode *St = cast<StoreSDNode>(N); + if (St->isVolatile()) + return SDValue(); + + // Optimize trunc store (of multiple scalars) to shuffle and store. First, + // pack all of the elements in one place. Next, store to memory in fewer + // chunks. SDValue StVal = St->getValue(); - if (!ISD::isNormalStore(St) || St->isVolatile()) + EVT VT = StVal.getValueType(); + if (St->isTruncatingStore() && VT.isVector()) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); + + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems*SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + DebugLoc DL = St->getDebugLoc(); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; + } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector<SDValue, 8> Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + StoreType, ShuffWide, + DAG.getIntPtrConstant(I)); + SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], + Chains.size()); + } + + if (!ISD::isNormalStore(St)) return SDValue(); + // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and + // ARM stores of arguments in the same cache line. if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && - StVal.getNode()->hasOneUse() && !St->isVolatile()) { + StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; DebugLoc DL = St->getDebugLoc(); SDValue BasePtr = St->getBasePtr(); @@ -6996,6 +7462,8 @@ static SDValue PerformSTORECombine(SDNode *N, StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); + // Bitcast an i64 store extracted from a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; DebugLoc dl = StVal.getDebugLoc(); SDValue IntVec = StVal.getOperand(0); @@ -7177,7 +7645,7 @@ static SDValue CombineBaseUpdate(SDNode *N, if (isIntrinsic) { unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); switch (IntNo) { - default: assert(0 && "unexpected intrinsic for Neon base update"); + default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; break; case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; @@ -7210,7 +7678,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } else { isLaneOp = true; switch (N->getOpcode()) { - default: assert(0 && "unexpected opcode for Neon base update"); + default: llvm_unreachable("unexpected opcode for Neon base update"); case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; @@ -7703,6 +8171,18 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { + // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high + // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. + SDValue N1 = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + SDValue N0 = N->getOperand(0); + if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && + DAG.MaskedValueIsZero(N0.getOperand(0), + APInt::getHighBitsSet(32, 16))) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); + } + } // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -7824,7 +8304,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return -0, so vmin can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; @@ -7846,7 +8326,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return +0, so vmax can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; @@ -7906,8 +8386,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -7931,7 +8410,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); - case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); + case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); @@ -8001,6 +8481,41 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { } } +static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, + unsigned AlignCheck) { + return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && + (DstAlign == 0 || DstAlign % AlignCheck == 0)); +} + +EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const { + const Function *F = MF.getFunction(); + + // See if we can use NEON instructions for this... + if (IsZeroVal && + !F->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { + return MVT::v4i32; + } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { + return MVT::v2i32; + } + } + + // Lowering to i32/i16 if the size permits. + if (Size >= 4) { + return MVT::i32; + } else if (Size >= 2) { + return MVT::i16; + } + + // Let the target-independent logic figure it out. + return MVT::Other; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; @@ -8188,7 +8703,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, if (Scale & 1) return false; return isPowerOf2_32(Scale); } - break; } return true; } @@ -8198,10 +8712,12 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, /// a register against the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) - return ARM_AM::getSOImmVal(Imm) != -1; + return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; if (Subtarget->isThumb2()) - return ARM_AM::getT2SOImmVal(Imm) != -1; + return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; + // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } @@ -8388,22 +8904,20 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, } void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), Mask, - KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 5da9b27..352d980 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -15,6 +15,7 @@ #ifndef ARMISELLOWERING_H #define ARMISELLOWERING_H +#include "ARM.h" #include "ARMSubtarget.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -56,7 +57,11 @@ namespace llvm { CMPFP, // ARM VFP compare instruction, sets FPSCR. CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. + CMOV, // ARM conditional move instructions. + CAND, // ARM conditional and instructions. + COR, // ARM conditional or instructions. + CXOR, // ARM conditional xor instructions. BCC_i64, @@ -81,7 +86,6 @@ namespace llvm { EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup. TC_RETURN, // Tail call return pseudo. @@ -146,6 +150,9 @@ namespace llvm { VMOVIMM, VMVNIMM, + // Vector move f32 immediate: + VMOVFPIMM, + // Vector duplicate: VDUP, VDUPLANE, @@ -266,9 +273,14 @@ namespace llvm { /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. - /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON? virtual bool allowsUnalignedMemoryAccesses(EVT VT) const; + virtual EVT getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const; + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; @@ -303,7 +315,6 @@ namespace llvm { SelectionDAG &DAG) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -338,7 +349,7 @@ namespace llvm { /// getRegClassFor - Return the register class that should be used for the /// specified value type. - virtual TargetRegisterClass *getRegClassFor(EVT VT) const; + virtual const TargetRegisterClass *getRegClassFor(EVT VT) const; /// getMaximalGlobalOffset - Returns the maximal possible offset which can /// be used for loads / stores from the global. @@ -402,7 +413,6 @@ namespace llvm { ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -424,6 +434,8 @@ namespace llvm { SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; @@ -452,7 +464,7 @@ namespace llvm { virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -481,7 +493,7 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; @@ -512,9 +524,6 @@ namespace llvm { bool signExtend, ARMCC::CondCodes Cond) const; - void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB) const; - void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 7cbc911..1d38bcf 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -1,4 +1,4 @@ -//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=// +//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -174,7 +174,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> { // ARM special operands for disassembly only. // -def SetEndAsmOperand : AsmOperandClass { +def SetEndAsmOperand : ImmAsmOperand { let Name = "SetEndImm"; let ParserMethod = "parseSetEndImm"; } @@ -201,21 +201,29 @@ def msr_mask : Operand<i32> { // 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0> // 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> // 64 64 - <imm> is encoded in imm6<5:0> +def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; } def shr_imm8 : Operand<i32> { let EncoderMethod = "getShiftRight8Imm"; let DecoderMethod = "DecodeShiftRight8Imm"; + let ParserMatchClass = shr_imm8_asm_operand; } +def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; } def shr_imm16 : Operand<i32> { let EncoderMethod = "getShiftRight16Imm"; let DecoderMethod = "DecodeShiftRight16Imm"; + let ParserMatchClass = shr_imm16_asm_operand; } +def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; } def shr_imm32 : Operand<i32> { let EncoderMethod = "getShiftRight32Imm"; let DecoderMethod = "DecodeShiftRight32Imm"; + let ParserMatchClass = shr_imm32_asm_operand; } +def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; } def shr_imm64 : Operand<i32> { let EncoderMethod = "getShiftRight64Imm"; let DecoderMethod = "DecodeShiftRight64Imm"; + let ParserMatchClass = shr_imm64_asm_operand; } //===----------------------------------------------------------------------===// @@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>; class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1> : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>; +class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1> + : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>; + + +class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, + Requires<[HasVFP2]>; +class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>, + Requires<[HasNEON]>; //===----------------------------------------------------------------------===// // ARM Instruction templates. @@ -274,6 +290,14 @@ class InstTemplate<AddrMode am, int sz, IndexMode im, class Encoding { field bits<32> Inst; + // Mask of bits that cause an encoding to be UNPREDICTABLE. + // If a bit is set, then if the corresponding bit in the + // target encoding differs from its value in the "Inst" field, + // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). + field bits<32> Unpredictable = 0; + // SoftFail is the generic name for this field, but we alias it so + // as to make it more obvious what it means in ARM-land. + field bits<32> SoftFail = Unpredictable; } class InstARM<AddrMode am, int sz, IndexMode im, @@ -290,6 +314,32 @@ class InstThumb<AddrMode am, int sz, IndexMode im, let DecoderNamespace = "Thumb"; } +// Pseudo-instructions for alternate assembly syntax (never used by codegen). +// These are aliases that require C++ handling to convert to the target +// instruction, while InstAliases can be handled directly by tblgen. +class AsmPseudoInst<string asm, dag iops> + : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain, + "", NoItinerary> { + let OutOperandList = (outs); + let InOperandList = iops; + let Pattern = []; + let isCodeGenOnly = 0; // So we get asm matcher for it. + let AsmString = asm; + let isPseudo = 1; +} + +class ARMAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, + Requires<[IsARM]>; +class tAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, + Requires<[IsThumb]>; +class t2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, + Requires<[IsThumb2]>; +class VFP2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, + Requires<[HasVFP2]>; +class NEONAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>, + Requires<[HasNEON]>; + +// Pseudo instructions for the code generator. class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern> : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain, "", itin> { @@ -481,6 +531,8 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> let Inst{15-12} = Rt; let Inst{11-4} = 0b00001001; let Inst{3-0} = Rt2; + + let DecoderMethod = "DecodeSwap"; } // addrmode1 instructions @@ -792,7 +844,7 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops, } // PKH instructions -def PKHLSLAsmOperand : AsmOperandClass { +def PKHLSLAsmOperand : ImmAsmOperand { let Name = "PKHLSLImm"; let ParserMethod = "parsePKHLSLImm"; } @@ -1550,8 +1602,11 @@ class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> { + bits<5> fbits; // size (fixed-point number): sx == 0 ? 16 : 32 let Inst{7} = op5; // sx + let Inst{5} = fbits{0}; + let Inst{3-0} = fbits{4-1}; } // VFP conversion instructions, if no NEON @@ -1963,3 +2018,54 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops, class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> { list<Predicate> Predicates = [HasNEON,UseNEONForFP]; } + +// VFP/NEON Instruction aliases for type suffices. +class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> : + InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>; + +multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { + def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; +} + +multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> { + let Predicates = [HasNEON] in { + def : VFPDataTypeInstAlias<opc, ".8", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".16", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".32", asm, Result>; + def : VFPDataTypeInstAlias<opc, ".64", asm, Result>; +} +} + +// The same alias classes using AsmPseudo instead, for the more complex +// stuff in NEON that InstAlias can't quite handle. +// Note that we can't use anonymous defm references here like we can +// above, as we care about the ultimate instruction enum names generated, unlike +// for instalias defs. +class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> : + AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>; + +// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. +def : TokenAlias<".s8", ".i8">; +def : TokenAlias<".u8", ".i8">; +def : TokenAlias<".s16", ".i16">; +def : TokenAlias<".u16", ".i16">; +def : TokenAlias<".s32", ".i32">; +def : TokenAlias<".u32", ".i32">; +def : TokenAlias<".s64", ".i64">; +def : TokenAlias<".u64", ".i64">; + +def : TokenAlias<".i8", ".8">; +def : TokenAlias<".i16", ".16">; +def : TokenAlias<".i32", ".32">; +def : TokenAlias<".i64", ".64">; + +def : TokenAlias<".p8", ".8">; +def : TokenAlias<".p16", ".16">; + +def : TokenAlias<".f32", ".32">; +def : TokenAlias<".f64", ".64">; +def : TokenAlias<".f", ".f32">; +def : TokenAlias<".d", ".f64">; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 48da03f..b8f607e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===// +//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -21,12 +21,29 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + if (hasNOP()) { + NopInst.setOpcode(ARM::NOP); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); + } else { + NopInst.setOpcode(ARM::MOVr); + NopInst.addOperand(MCOperand::CreateReg(ARM::R0)); + NopInst.addOperand(MCOperand::CreateReg(ARM::R0)); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); + NopInst.addOperand(MCOperand::CreateReg(0)); + } +} + unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { switch (Opc) { default: break; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h index f2c7bdc..5d3e059 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===// +//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,11 +14,10 @@ #ifndef ARMINSTRUCTIONINFO_H #define ARMINSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" +#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" -#include "ARM.h" namespace llvm { class ARMSubtarget; @@ -28,6 +27,9 @@ class ARMInstrInfo : public ARMBaseInstrInfo { public: explicit ARMInstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 2cf0f09..3caaa23 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -58,8 +58,6 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>; - def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, @@ -143,9 +141,6 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; -def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP", - SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>; - def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, [SDNPHasChain]>; @@ -184,6 +179,8 @@ def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3">; +def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, + AssemblerPredicate<"FeatureVFP4">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, @@ -211,16 +208,20 @@ def IsARClass : Predicate<"!Subtarget->isMClass()">, AssemblerPredicate<"!FeatureMClass">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb">; -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; -def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">, - AssemblerPredicate<"ModeNaCl">; +def IsIOS : Predicate<"Subtarget->isTargetIOS()">; +def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; +def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; // FIXME: Eventually this will be just "hasV6T2Ops". def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. +// But only select them if more precision in FP computation is allowed. +def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision">; +def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -244,25 +245,28 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; -/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. -def imm1_15 : ImmLeaf<i32, [{ - return (int32_t)Imm >= 1 && (int32_t)Imm < 16; -}]>; - /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. def imm16_31 : ImmLeaf<i32, [{ return (int32_t)Imm >= 16 && (int32_t)Imm < 32; }]>; -def so_imm_neg : - PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; - }], so_imm_neg_XFORM>; +def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } +def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ + int64_t Value = -(int)N->getZExtValue(); + return Value && ARM_AM::getSOImmVal(Value) != -1; + }], so_imm_neg_XFORM> { + let ParserMatchClass = so_imm_neg_asmoperand; +} -def so_imm_not : - PatLeaf<(imm), [{ +// Note: this pattern doesn't require an encoder method and such, as it's +// only used on aliases (Pat<> and InstAlias<>). The actual encoding +// is handled by the destination instructions, which use so_imm. +def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } +def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; - }], so_imm_not_XFORM>; + }], so_imm_not_XFORM> { + let ParserMatchClass = so_imm_not_asmoperand; +} // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ @@ -279,14 +283,6 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; }], hi16>; -/// imm0_65535 - An immediate is in the range [0.65535]. -def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; } -def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ - return Imm >= 0 && Imm < 65536; -}]> { - let ParserMatchClass = Imm0_65535AsmOperand; -} - class BinOpWithFlagFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>; class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; @@ -321,6 +317,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ // Operand Definitions. // +// Immediate operands with a shared generic asm render method. +class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; } + // Branch target. // FIXME: rename brtarget to t2_brtarget def brtarget : Operand<OtherVT> { @@ -352,13 +351,11 @@ def bltarget : Operand<i32> { // Call target for ARM. Handles conditional/unconditional // FIXME: rename bl_target to t2_bltarget? def bl_target : Operand<i32> { - // Encoded the same as branch targets. - let EncoderMethod = "getARMBranchTargetOpValue"; + let EncoderMethod = "getARMBLTargetOpValue"; let OperandType = "OPERAND_PCREL"; } def blx_target : Operand<i32> { - // Encoded the same as branch targets. let EncoderMethod = "getARMBLXTargetOpValue"; let OperandType = "OPERAND_PCREL"; } @@ -475,6 +472,7 @@ def shift_so_reg_reg : Operand<i32>, // reg reg imm let EncoderMethod = "getSORegRegOpValue"; let PrintMethod = "printSORegRegOperand"; let DecoderMethod = "DecodeSORegRegOperand"; + let ParserMatchClass = ShiftedRegAsmOperand; let MIOperandInfo = (ops GPR, GPR, i32imm); } @@ -485,13 +483,14 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm let EncoderMethod = "getSORegImmOpValue"; let PrintMethod = "printSORegImmOperand"; let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; let MIOperandInfo = (ops GPR, i32imm); } // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. -def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; } +def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; } def so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getSOImmVal(Imm) != -1; }]> { @@ -515,16 +514,60 @@ def arm_i32imm : PatLeaf<(imm), [{ return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; +/// imm0_1 predicate - Immediate in the range [0,1]. +def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; } +def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; } + +/// imm0_3 predicate - Immediate in the range [0,3]. +def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } +def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; } + /// imm0_7 predicate - Immediate in the range [0,7]. -def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; } +def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 8; }]> { let ParserMatchClass = Imm0_7AsmOperand; } +/// imm8 predicate - Immediate is exactly 8. +def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; } +def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> { + let ParserMatchClass = Imm8AsmOperand; +} + +/// imm16 predicate - Immediate is exactly 16. +def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; } +def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> { + let ParserMatchClass = Imm16AsmOperand; +} + +/// imm32 predicate - Immediate is exactly 32. +def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; } +def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { + let ParserMatchClass = Imm32AsmOperand; +} + +/// imm1_7 predicate - Immediate in the range [1,7]. +def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } +def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> { + let ParserMatchClass = Imm1_7AsmOperand; +} + +/// imm1_15 predicate - Immediate in the range [1,15]. +def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; } +def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> { + let ParserMatchClass = Imm1_15AsmOperand; +} + +/// imm1_31 predicate - Immediate in the range [1,31]. +def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; } +def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { + let ParserMatchClass = Imm1_31AsmOperand; +} + /// imm0_15 predicate - Immediate in the range [0,15]. -def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; } +def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; } def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16; }]> { @@ -532,33 +575,57 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ } /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; } +def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; } def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 32; }]> { let ParserMatchClass = Imm0_31AsmOperand; } +/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32]. +def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; } +def imm0_32 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 32; +}]> { + let ParserMatchClass = Imm0_32AsmOperand; +} + +/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63]. +def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; } +def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 64; +}]> { + let ParserMatchClass = Imm0_63AsmOperand; +} + /// imm0_255 predicate - Immediate in the range [0,255]. -def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; } +def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> { let ParserMatchClass = Imm0_255AsmOperand; } +/// imm0_65535 - An immediate is in the range [0.65535]. +def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; } +def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 65536; +}]> { + let ParserMatchClass = Imm0_65535AsmOperand; +} + // imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference // a relocatable expression. // // FIXME: This really needs a Thumb version separate from the ARM version. // While the range is the same, and can thus use the same match class, // the encoding is different so it should have a different encoder method. -def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; } +def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; } def imm0_65535_expr : Operand<i32> { let EncoderMethod = "getHiLo16ImmOpValue"; let ParserMatchClass = Imm0_65535ExprAsmOperand; } /// imm24b - True if the 32-bit immediate is encodable in 24 bits. -def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; } +def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; } def imm24b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 0xffffff; }]> { @@ -572,6 +639,7 @@ def BitfieldAsmOperand : AsmOperandClass { let Name = "Bitfield"; let ParserMethod = "parseBitfield"; } + def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ return ARM::isBitFieldInvertedMask(N->getZExtValue()); @@ -670,7 +738,7 @@ def postidx_reg : Operand<i32> { let DecoderMethod = "DecodePostIdxReg"; let PrintMethod = "printPostIdxRegOperand"; let ParserMatchClass = PostIdxRegAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } @@ -699,7 +767,7 @@ def am2offset_reg : Operand<i32>, let PrintMethod = "printAddrMode2OffsetOperand"; // When using this for assembly, it's always as a post-index offset. let ParserMatchClass = PostIdxRegShiftedAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } // FIXME: am2offset_imm should only need the immediate, not the GPR. Having @@ -711,7 +779,7 @@ def am2offset_imm : Operand<i32>, let EncoderMethod = "getAddrMode2OffsetOpValue"; let PrintMethod = "printAddrMode2OffsetOperand"; let ParserMatchClass = AM2OffsetImmAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } @@ -799,6 +867,9 @@ def addrmode6dup : Operand<i32>, let PrintMethod = "printAddrMode6Operand"; let MIOperandInfo = (ops GPR:$addr, i32imm); let EncoderMethod = "getAddrMode6DupAddressOpValue"; + // FIXME: This is close, but not quite right. The alignment specifier is + // different. + let ParserMatchClass = AddrMode6AsmOperand; } // addrmodepc := pc + reg @@ -1041,69 +1112,58 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, } -/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default. +/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default. /// /// These opcodes will be converted to the real non-S opcodes by -/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. -let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { -multiclass AsI1_rbin_s_is<bits<4> opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>; - - def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, - iir, opc, "\t$Rd, $Rn, $Rm", - [/* pattern left blank */]>; - - def rsi : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, - iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>; - - def rsr : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, - iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; +/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand. +let hasPostISelHook = 1, Defs = [CPSR] in { +multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir, + InstrItinClass iis, PatFrag opnode, + bit Commutable = 0> { + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), + 4, iii, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>; + + def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p), + 4, iir, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> { + let isCommutable = Commutable; } + def rsi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), + 4, iis, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, + so_reg_imm:$shift))]>; + + def rsr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), + 4, iis, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, + so_reg_reg:$shift))]>; } } -/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default. -/// -/// These opcodes will be converted to the real non-S opcodes by -/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. -let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { -multiclass AsI1_bin_s_irs<bits<4> opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>; - def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, - iir, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>; - def rsi : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, - iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>; +/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG +/// operands are reversed. +let hasPostISelHook = 1, Defs = [CPSR] in { +multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, + InstrItinClass iis, PatFrag opnode, + bit Commutable = 0> { + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), + 4, iii, + [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>; - def rsr : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, - iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>; + def rsi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), + 4, iis, + [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, + GPR:$Rn))]>; + + def rsr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), + 4, iis, + [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, + GPR:$Rn))]>; } } @@ -1272,10 +1332,10 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{4} = 0; let Inst{3-0} = shift{3-0}; } - def rsr : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift), + def rsr : AsI1<opcod, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>, + [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; @@ -1309,7 +1369,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, cc_out:$s)>, Requires<[IsARM]>; def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, Requires<[IsARM]>; @@ -1550,7 +1610,7 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, } // Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. -// (These psuedos use a hand-written selection code). +// (These pseudos use a hand-written selection code). let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), (ins GPR:$addr, GPR:$src1, GPR:$src2), @@ -1652,7 +1712,7 @@ class CPS<dag iops, string asm_ops> let Inst{27-20} = 0b00010000; let Inst{19-18} = imod; let Inst{17} = M; // Enabled if mode is set; - let Inst{16} = 0; + let Inst{16-9} = 0b00000000; let Inst{8-6} = iflags; let Inst{5} = 0; let Inst{4-0} = mode; @@ -1839,20 +1899,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { } } -// All calls clobber the non-callee saved registers. SP is marked as -// a use to prevent stack-pointer assignments that appear immediately -// before calls from potentially appearing dead. +// SP is marked as a use to prevent stack-pointer assignments that appear +// immediately before calls from potentially appearing dead. let isCall = 1, - // On non-Darwin platforms R9 is callee-saved. // FIXME: Do we really need a non-predicated version? If so, it should // at least be a pseudo instruction expanding to the predicated version // at MC lowering time. - Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [SP] in { + Defs = [LR], Uses = [SP] in { def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM, IsNotDarwin]> { + Requires<[IsARM]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1862,7 +1919,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsNotDarwin]> { + Requires<[IsARM]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -1872,7 +1929,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotDarwin]> { + Requires<[IsARM, HasV5T]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -1881,7 +1938,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotDarwin]> { + Requires<[IsARM, HasV5T]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -1891,55 +1948,19 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsNotDarwin]>; + Requires<[IsARM, HasV4T]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsNotDarwin]>; -} - -let isCall = 1, - // On Darwin R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [R7, SP] in { - def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), - 4, IIC_Br, - [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>, - Requires<[IsARM, IsDarwin]>; - - def BLr9_pred : ARMPseudoExpand<(outs), - (ins bl_target:$func, pred:$p, variable_ops), - 4, IIC_Br, - [(ARMcall_pred tglobaladdr:$func)], - (BL_pred bl_target:$func, pred:$p)>, - Requires<[IsARM, IsDarwin]>; - - // ARMv5T and above - def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops), - 4, IIC_Br, - [(ARMcall GPR:$func)], - (BLX GPR:$func)>, - Requires<[IsARM, HasV5T, IsDarwin]>; - - def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops), - 4, IIC_Br, - [(ARMcall_pred GPR:$func)], - (BLX_pred GPR:$func, pred:$p)>, - Requires<[IsARM, HasV5T, IsDarwin]>; + Requires<[IsARM, NoV4T]>; - // ARMv4T - // Note: Restrict $func to the tGPR regclass to prevent it being in LR. - def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsDarwin]>; - - // ARMv4 - def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsDarwin]>; + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def BMOVPCB_CALL : ARMPseudoInst<(outs), + (ins bl_target:$func, variable_ops), + 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsARM]>; } let isBranch = 1, isTerminator = 1 in { @@ -2006,47 +2027,22 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsDarwin]>; - - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsDarwin]>; - - def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), - 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsDarwin]>; - - def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (BX GPR:$dst)>, - Requires<[IsARM, IsDarwin]>; - - } - - // Non-Darwin versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { - def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotDarwin]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), + IIC_Br, []>; - def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotDarwin]>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + IIC_Br, []>; - def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops), - 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsNotDarwin]>; + def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), + 4, IIC_Br, [], + (Bcc br_target:$dst, (ops 14, zero_reg))>, + Requires<[IsARM]>; - def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (BX GPR:$dst)>, - Requires<[IsARM, IsNotDarwin]>; - } + def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + 4, IIC_Br, [], + (BX GPR:$dst)>, + Requires<[IsARM]>; } // Secure Monitor Call is a system instruction. @@ -2145,7 +2141,7 @@ def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> { } //===----------------------------------------------------------------------===// -// Load / store Instructions. +// Load / Store Instructions. // // Load @@ -2197,9 +2193,10 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), } // Indexed loads -multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { +multiclass AI2_ldridx<bit isByte, string opc, + InstrItinClass iii, InstrItinClass iir> { def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin, + (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 0; @@ -2211,7 +2208,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { } def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin, + (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 1; @@ -2225,7 +2222,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, LdFrm, itin, + IndexModePost, LdFrm, iir, opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd @@ -2242,7 +2239,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, LdFrm, itin, + IndexModePost, LdFrm, iii, opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd @@ -2260,8 +2257,10 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { } let mayLoad = 1, neverHasSideEffects = 1 in { -defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>; -defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>; +// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or +// IIC_iLoad_siu depending on whether it the offset register is shifted. +defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>; +defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>; } multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { @@ -2416,7 +2415,7 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{3-0} = offset{3-0}; let AsmMatchConverter = "cvtLdExtTWriteBackImm"; } - def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb), + def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb), (ins addr_offset_none:$addr, postidx_reg:$Rm), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc, "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> { @@ -2424,8 +2423,10 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{23} = Rm{4}; let Inst{22} = 0; let Inst{11-8} = 0; + let Unpredictable{11-8} = 0b1111; let Inst{3-0} = Rm{3-0}; let AsmMatchConverter = "cvtLdExtTWriteBackReg"; + let DecoderMethod = "DecodeLDR"; } } @@ -2451,10 +2452,11 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), } // Indexed stores -multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> { +multiclass AI2_stridx<bit isByte, string opc, + InstrItinClass iii, InstrItinClass iir> { def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre, - StFrm, itin, + StFrm, iii, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 0; @@ -2467,7 +2469,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> { def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, ldst_so_reg:$addr), - IndexModePre, StFrm, itin, + IndexModePre, StFrm, iir, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 1; @@ -2480,7 +2482,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> { } def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, StFrm, itin, + IndexModePost, StFrm, iir, opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd @@ -2497,7 +2499,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> { def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, StFrm, itin, + IndexModePost, StFrm, iii, opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd @@ -2514,8 +2516,10 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> { } let mayStore = 1, neverHasSideEffects = 1 in { -defm STR : AI2_stridx<0, "str", IIC_iStore_ru>; -defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>; +// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or +// IIC_iStore_siu depending on whether it the offset register is shifted. +defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>; +defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>; } def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, @@ -2745,23 +2749,25 @@ defm STRHT : AI3strT<0b1011, "strht">; // Load / store multiple Instructions. // -multiclass arm_ldst_mult<string asm, bit L_bit, Format f, +multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f, InstrItinClass itin, InstrItinClass itin_upd> { // IA is the default, so no need for an explicit suffix on the // mnemonic here. Without it is the cannonical spelling. def IA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def IA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b01; // Increment After + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2770,16 +2776,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def DA : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def DA_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b00; // Decrement After + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2788,16 +2796,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def DB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def DB_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b10; // Decrement Before + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2806,16 +2816,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, def IB : AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeNone, f, itin, - !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> { + !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> { let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; let Inst{21} = 0; // No writeback let Inst{20} = L_bit; } def IB_UPD : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops), IndexModeUpd, f, itin_upd, - !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> { + !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> { let Inst{24-23} = 0b11; // Increment Before + let Inst{22} = P_bit; let Inst{21} = 1; // Writeback let Inst{20} = L_bit; @@ -2826,10 +2838,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let neverHasSideEffects = 1 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in -defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>; +defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in -defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>; +defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; } // neverHasSideEffects @@ -2843,6 +2857,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>, RegConstraint<"$Rn = $wb">; +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m, + IIC_iLoad_mu>; + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, + IIC_iStore_mu>; + + + //===----------------------------------------------------------------------===// // Move Instructions. // @@ -2860,7 +2884,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, let Inst{15-12} = Rd; } -def : ARMInstAlias<"movs${p} $Rd, $Rm", +def : ARMInstAlias<"movs${p} $Rd, $Rm", (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>; // A version for the smaller set of tail call registers. @@ -3080,20 +3104,18 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", // ADD and SUB with 's' bit set. // -// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the -// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by +// Currently, ADDS/SUBS are pseudo opcodes that exist only in the +// selection DAG. They are "lowered" to real ADD/SUB opcodes by // AdjustInstrPostInstrSelection where we determine whether or not to // set the "s" bit based on CPSR liveness. // -// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen +// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen // support for an optional CPSR definition that corresponds to the DAG // node's second value. We can then eliminate the implicit def of CPSR. -defm ADDS : AsI1_bin_s_irs<0b0100, "add", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; -defm SUBS : AsI1_bin_s_irs<0b0010, "sub", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; +defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, @@ -3108,9 +3130,8 @@ defm RSB : AsI1_rbin_irs <0b0011, "rsb", // FIXME: Eliminate them if we can write def : Pat patterns which defines // CPSR and the implicit def of CPSR is not needed. -defm RSBS : AsI1_rbin_s_is<0b0011, "rsb", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; defm RSC : AI1_rsc_irs<0b0111, "rsc", BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, @@ -3153,6 +3174,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, let Inst{19-16} = Rn; let Inst{15-12} = Rd; let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1111; } // Saturating add/subtract @@ -3445,19 +3468,20 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, // property. Remove them when it's possible to add those properties // on an individual MachineInstr, not just an instuction description. let isCommutable = 1 in { -def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), +def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, Requires<[IsARM, HasV6]> { let Inst{15-12} = 0b0000; + let Unpredictable{15-12} = 0b1111; } let Constraints = "@earlyclobber $Rd" in -def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, +def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s), 4, IIC_iMUL32, - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))], - (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; } @@ -3952,10 +3976,13 @@ def BCCZi64 : PseudoInst<(outs), // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_reg_imm:$shift, pred:$p), 4, IIC_iCMOVsr, @@ -3996,8 +4023,44 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), 4, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + +// Conditional instructions +multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, + Instruction irsr, + InstrItinClass iii, InstrItinClass iir, + InstrItinClass iis> { + def ri : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s), + 4, iii, [], + (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rr : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + 4, iir, [], + (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rsi : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s), + 4, iis, [], + (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s), + 4, iis, [], + (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; +} + +defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; + } // neverHasSideEffects + //===----------------------------------------------------------------------===// // Atomic operations intrinsics // @@ -4076,10 +4139,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; @@ -4106,10 +4169,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; @@ -4136,10 +4199,10 @@ let usesCustomInserter = 1 in { [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMIN_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_UMAX_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, @@ -4185,14 +4248,14 @@ def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; -} - -let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in +let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr), NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } +} + def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, Requires<[IsARM, HasV7]> { @@ -4451,10 +4514,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", + (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", + (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -4488,10 +4557,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", + (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", + (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), @@ -4635,7 +4710,8 @@ let isCall = 1, // no encoding information is necessary. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1 in { + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, @@ -4644,31 +4720,37 @@ let Defs = let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - hasSideEffects = 1, isBarrier = 1 in { + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } -// FIXME: Non-Darwin version(s) +// FIXME: Non-IOS version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, Defs = [ R7, LR, SP ] in { def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), NoItinerary, [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsARM, IsDarwin]>; + Requires<[IsARM, IsIOS]>; } -// eh.sjlj.dispatchsetup pseudo-instruction. -// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are +// eh.sjlj.dispatchsetup pseudo-instructions. +// These pseudos are used for both ARM and Thumb2. Any differences are // handled when the pseudo is expanded (which happens before any passes // that need the instruction size). -let isBarrier = 1, hasSideEffects = 1 in -def Int_eh_sjlj_dispatchsetup : - PseudoInst<(outs), (ins GPR:$src), NoItinerary, - [(ARMeh_sjlj_dispatchsetup GPR:$src)]>, - Requires<[IsDarwin]>; +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + isBarrier = 1 in +def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], + isBarrier = 1 in +def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -4725,30 +4807,15 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // TODO: add,sub,and, 3-instr forms? -// Tail calls -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; +// Tail calls. These patterns also apply to Thumb mode. +def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>; +def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>; +def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>; // Direct calls -def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsARM, IsNotDarwin]>; -def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsARM, IsDarwin]>; +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; +def : ARMPat<(ARMcall_nolink texternalsym:$func), + (BMOVPCB_CALL texternalsym:$func)>; // zextload i1 -> zextload i8 def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; @@ -4992,13 +5059,113 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">; // USAX == USUBADDX def : MnemonicAlias<"usubaddx", "usax">; -// LDRSBT/LDRHT/LDRSHT post-index offset if optional. -// Note that the write-back output register is a dummy operand for MC (it's -// only meaningful for codegen), so we just pass zero here. -// FIXME: tblgen not cooperating with argument conversions. -//def : InstAlias<"ldrsbt${p} $Rt, $addr", -// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>; -//def : InstAlias<"ldrht${p} $Rt, $addr", -// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; -//def : InstAlias<"ldrsht${p} $Rt, $addr", -// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; +// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like +// for isel. +def : ARMInstAlias<"mov${s}${p} $Rd, $imm", + (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", + (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; +// Same for AND <--> BIC +def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", + (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", + (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", + (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : ARMInstAlias<"and${s}${p} $Rdn, $imm", + (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; + +// Likewise, "add Rd, so_imm_neg" -> sub +def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm", + (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"add${s}${p} $Rd, $imm", + (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via so_imm_neg +def : ARMInstAlias<"cmp${p} $Rd, $imm", + (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; +def : ARMInstAlias<"cmn${p} $Rd, $imm", + (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; + +// The shifter forms of the MOV instruction are aliased to the ASR, LSL, +// LSR, ROR, and RRX instructions. +// FIXME: We need C++ parser hooks to map the alias to the MOV +// encoding. It seems we should be able to do that sort of thing +// in tblgen, but it could get ugly. +def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, + cc_out:$s)>; +def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, + cc_out:$s)>; +def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, + cc_out:$s)>; +def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, + cc_out:$s)>; +def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; +def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +// shifter instructions also support a two-operand form. +def : ARMInstAlias<"asr${s}${p} $Rm, $imm", + (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"lsr${s}${p} $Rm, $imm", + (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"lsl${s}${p} $Rm, $imm", + (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"ror${s}${p} $Rm, $imm", + (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"asr${s}${p} $Rn, $Rm", + (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm", + (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm", + (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"ror${s}${p} $Rn, $Rm", + (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; + + +// 'mul' instruction can be specified with only two operands. +def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", + (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", + (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// Pre-v6, 'mov r0, r0' was used as a NOP encoding. +def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, + Requires<[IsARM, NoV6]>; + +// UMULL/SMULL are available on all arches, but the instruction definitions +// need difference constraints pre-v6. Use these aliases for the assembly +// parsing on pre-v6. +def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; + +// 'it' blocks in ARM mode just validate the predicates. The IT itself +// is discarded. +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 7aad186..c7219a6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -1,4 +1,4 @@ -//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// +//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -15,6 +15,45 @@ //===----------------------------------------------------------------------===// // NEON-specific Operands. //===----------------------------------------------------------------------===// +def nModImm : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; +} + +def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } +def nImmSplatI8 : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmSplatI8AsmOperand; +} +def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } +def nImmSplatI16 : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmSplatI16AsmOperand; +} +def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } +def nImmSplatI32 : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmSplatI32AsmOperand; +} +def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } +def nImmVMOVI32 : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32AsmOperand; +} +def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } +def nImmVMOVI32Neg : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32NegAsmOperand; +} +def nImmVMOVF32 : Operand<i32> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} +def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } +def nImmSplatI64 : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmSplatI64AsmOperand; +} + def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } @@ -40,6 +79,326 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ let MIOperandInfo = (ops i32imm); } +// Register list of one D register. +def VecListOneDAsmOperand : AsmOperandClass { + let Name = "VecListOneD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { + let ParserMatchClass = VecListOneDAsmOperand; +} +// Register list of two sequential D registers. +def VecListDPairAsmOperand : AsmOperandClass { + let Name = "VecListDPair"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { + let ParserMatchClass = VecListDPairAsmOperand; +} +// Register list of three sequential D registers. +def VecListThreeDAsmOperand : AsmOperandClass { + let Name = "VecListThreeD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { + let ParserMatchClass = VecListThreeDAsmOperand; +} +// Register list of four sequential D registers. +def VecListFourDAsmOperand : AsmOperandClass { + let Name = "VecListFourD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { + let ParserMatchClass = VecListFourDAsmOperand; +} +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListDPairSpacedAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpaced"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { + let ParserMatchClass = VecListDPairSpacedAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListThreeQAsmOperand : AsmOperandClass { + let Name = "VecListThreeQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { + let ParserMatchClass = VecListThreeQAsmOperand; +} +// Register list of three D registers spaced by 2 (three Q registers). +def VecListFourQAsmOperand : AsmOperandClass { + let Name = "VecListFourQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { + let ParserMatchClass = VecListFourQAsmOperand; +} + +// Register list of one D register, with "all lanes" subscripting. +def VecListOneDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListOneDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { + let ParserMatchClass = VecListOneDAllLanesAsmOperand; +} +// Register list of two D registers, with "all lanes" subscripting. +def VecListDPairAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairAllLanes : RegisterOperand<DPair, + "printVectorListTwoAllLanes"> { + let ParserMatchClass = VecListDPairAllLanesAsmOperand; +} +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpacedAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListDPairSpacedAllLanes : RegisterOperand<DPair, + "printVectorListTwoSpacedAllLanes"> { + let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; +} +// Register list of three D registers, with "all lanes" subscripting. +def VecListThreeDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeDAllLanes : RegisterOperand<DPR, + "printVectorListThreeAllLanes"> { + let ParserMatchClass = VecListThreeDAllLanesAsmOperand; +} +// Register list of three D registers spaced by 2 (three sequential Q regs). +def VecListThreeQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListThreeQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeQAllLanes : RegisterOperand<DPR, + "printVectorListThreeSpacedAllLanes"> { + let ParserMatchClass = VecListThreeQAllLanesAsmOperand; +} +// Register list of four D registers, with "all lanes" subscripting. +def VecListFourDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { + let ParserMatchClass = VecListFourDAllLanesAsmOperand; +} +// Register list of four D registers spaced by 2 (four sequential Q regs). +def VecListFourQAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListFourQAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourQAllLanes : RegisterOperand<DPR, + "printVectorListFourSpacedAllLanes"> { + let ParserMatchClass = VecListFourQAllLanesAsmOperand; +} + + +// Register list of one D register, with byte lane subscripting. +def VecListOneDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListOneDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListOneDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +// Register list of two D registers with byte lane subscripting. +def VecListTwoDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of two Q registers with half-word lane subscripting. +def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + + +// Register list of three D registers with byte lane subscripting. +def VecListThreeDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of three Q registers with half-word lane subscripting. +def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListThreeQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListThreeQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListThreeQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListThreeQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + +// Register list of four D registers with byte lane subscripting. +def VecListFourDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListFourDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of four Q registers with half-word lane subscripting. +def VecListFourQHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourQHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListFourQWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListFourQWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListFourQWordIndexed : Operand<i32> { + let ParserMatchClass = VecListFourQWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} + + //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// @@ -103,6 +462,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; @@ -164,30 +524,22 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ }]>; //===----------------------------------------------------------------------===// -// NEON operand definitions -//===----------------------------------------------------------------------===// - -def nModImm : Operand<i32> { - let PrintMethod = "printNEONModImmOperand"; -} - -//===----------------------------------------------------------------------===// // NEON load / store instructions //===----------------------------------------------------------------------===// // Use VLDM to load a Q register as a D register pair. // This is a pseudo instruction that is expanded to VLDMD after reg alloc. def VLDMQIA - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), + : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; + [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), + : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; + [(store (v2f64 DPair:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -197,12 +549,31 @@ class VLDQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; +class VLDQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + class VLDQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; class VLDQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; +class VLDQQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + + class VLDQQQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, "$src = $dst">; @@ -215,17 +586,17 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd), + : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), (ins addrmode6:$Rn), IIC_VLD1, - "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, - "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; @@ -241,59 +612,82 @@ def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; -def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; -def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; - // ...with address register writeback: -class VLD1DWB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u, - "vld1", Dt, "\\{$Vd\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1DWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -class VLD1QWB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u, - "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">; -def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">; -def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">; -def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">; - -def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">; -def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">; -def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">; -def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">; - -def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; -def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; -def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; -def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; +defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; +defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; +defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; +defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; +defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; +defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; +defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; +defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; -// ...with 3 registers (some of these are only for the disassembler): +// ...with 3 registers class VLD1D3<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, - "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD1D3WB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt, - "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1D3WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; @@ -301,31 +695,40 @@ def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; -def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">; -def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">; -def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">; -def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">; +defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; +defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; +defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; +defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; -def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; -def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>; +def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; -// ...with 4 registers (some of these are only for the disassembler): +// ...with 4 registers class VLD1D4<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, - "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD1D4WB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt, - "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", - []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1D4WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; @@ -333,91 +736,80 @@ def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; -def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">; -def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">; -def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">; -def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">; +defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; +defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; +defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; +defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; -def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; -def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>; +def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), - (ins addrmode6:$Rn), IIC_VLD2, - "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2Q<bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$Rn), IIC_VLD2x2, - "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { +class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> + : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), + (ins addrmode6:$Rn), itin, + "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">; -def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">; -def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; -def VLD2q8 : VLD2Q<{0,0,?,?}, "8">; -def VLD2q16 : VLD2Q<{0,1,?,?}, "16">; -def VLD2q32 : VLD2Q<{1,0,?,?}, "32">; - -def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; -def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; -def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>; +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; // ...with address register writeback: -class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, - "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, - "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy, InstrItinClass itin> { + def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn), itin, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), itin, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">; -def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">; -def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; -def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">; -def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">; -def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">; +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; -def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; -def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; - -// ...with double-spaced registers (for disassembly only): -def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">; -def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">; -def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">; -def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">; -def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">; -def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">; +// ...with double-spaced registers +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -601,12 +993,11 @@ def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { } def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { let Inst{7-6} = lane{1-0}; - let Inst{4} = Rn{4}; + let Inst{5-4} = Rn{5-4}; } def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { let Inst{7} = lane{0}; - let Inst{5} = Rn{4}; - let Inst{4} = Rn{4}; + let Inst{5-4} = Rn{5-4}; } def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; @@ -776,7 +1167,7 @@ def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; } def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; @@ -787,7 +1178,7 @@ def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; } def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; @@ -802,7 +1193,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { let Rm = 0b1111; - let Inst{4} = Rn{4}; + let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; } @@ -813,7 +1204,7 @@ def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -826,7 +1217,7 @@ def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -854,7 +1245,7 @@ def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -866,7 +1257,7 @@ def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { - let Inst{7} = lane{0}; + let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } @@ -877,117 +1268,142 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), - IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", - [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), + (ins addrmode6dup:$Rn), + IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", + [(set VecListOneDAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { - let Pattern = [(set QPR:$dst, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; -} - def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; -def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>; -def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>; -def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>; - def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; -def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), - (VLD1DUPq32Pseudo addrmode6:$addr)>; - -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { -class VLD1QDUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), +class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", + [(set VecListDPairAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; +def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPq32 addrmode6:$addr)>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: -class VLD1DUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -class VLD1QDUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListDPairAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListDPairAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; -def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">; -def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">; - -def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">; -def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">; -def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">; +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; -def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; // VLD2DUP : Vector Load (single 2-element structure to all lanes) -class VLD2DUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2), +class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> + : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), (ins addrmode6dup:$Rn), IIC_VLD2dup, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8">; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">; - -def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; -def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; -def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; -// ...with double-spaced registers (not used for codegen): -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8">; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">; +// ...with double-spaced registers +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // ...with address register writeback: -class VLD2DUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu, - "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD2DupInstruction"; +multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { + def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1101, op7_4, + (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD2DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2DUPd8_UPD : VLD2DUPWB<{0,0,0,0}, "8">; -def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">; -def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">; - -def VLD2DUPd8x2_UPD : VLD2DUPWB<{0,0,1,0}, "8">; -def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">; -def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; -def VLD2DUPd8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; -def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; -def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP<bits<4> op7_4, string Dt> @@ -1008,9 +1424,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; // ...with double-spaced registers (not used for codegen): -def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; -def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; -def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; +def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; +def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; +def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB<bits<4> op7_4, string Dt> @@ -1026,9 +1442,9 @@ def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; -def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; @@ -1054,9 +1470,9 @@ def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; // ...with double-spaced registers (not used for codegen): -def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; -def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; -def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; +def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; +def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } // ...with address register writeback: class VLD4DUPWB<bits<4> op7_4, string Dt> @@ -1073,9 +1489,9 @@ def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; -def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; -def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } +def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; +def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; +def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; @@ -1093,12 +1509,29 @@ class VSTQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, "$addr.addr = $wb">; +class VSTQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QPR:$src), itin, + "$addr.addr = $wb">; +class VSTQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, + "$addr.addr = $wb">; class VSTQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; class VSTQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, "$addr.addr = $wb">; +class VSTQQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QQPR:$src), itin, + "$addr.addr = $wb">; +class VSTQQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, + "$addr.addr = $wb">; + class VSTQQQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; class VSTQQQQWBPseudo<InstrItinClass itin> @@ -1108,16 +1541,15 @@ class VSTQQQQWBPseudo<InstrItinClass itin> // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd), - IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> { + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), + IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2, - "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; @@ -1133,185 +1565,233 @@ def VST1q16 : VST1Q<{0,1,?,?}, "16">; def VST1q32 : VST1Q<{1,0,?,?}, "32">; def VST1q64 : VST1Q<{1,1,?,?}, "64">; -def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; -def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; - // ...with address register writeback: -class VST1DWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u, - "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1DWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), + IIC_VLD1u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST1QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), - IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), + IIC_VLD1x2u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">; -def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">; -def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">; -def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">; - -def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">; -def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">; -def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">; -def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">; +defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; +defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; +defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; +defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; -def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; -def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; -def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; -def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; +defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; +defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; +defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; +defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; -// ...with 3 registers (some of these are only for the disassembler): +// ...with 3 registers class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + (ins addrmode6:$Rn, VecListThreeD:$Vd), + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D3WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D3WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8T : VST1D3<{0,0,0,?}, "8">; -def VST1d16T : VST1D3<{0,1,0,?}, "16">; -def VST1d32T : VST1D3<{1,0,0,?}, "32">; -def VST1d64T : VST1D3<{1,1,0,?}, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8">; +def VST1d16T : VST1D3<{0,1,0,?}, "16">; +def VST1d32T : VST1D3<{1,0,0,?}, "32">; +def VST1d64T : VST1D3<{1,1,0,?}, "64">; -def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">; -def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">; -def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">; -def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; -def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; +def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; -// ...with 4 registers (some of these are only for the disassembler): +// ...with 4 registers class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", + (ins addrmode6:$Rn, VecListFourD:$Vd), + IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D4WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, - "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D4WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8Q : VST1D4<{0,0,?,?}, "8">; -def VST1d16Q : VST1D4<{0,1,?,?}, "16">; -def VST1d32Q : VST1D4<{1,0,?,?}, "32">; -def VST1d64Q : VST1D4<{1,1,?,?}, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8">; +def VST1d16Q : VST1D4<{0,1,?,?}, "16">; +def VST1d32Q : VST1D4<{1,0,?,?}, "32">; +def VST1d64Q : VST1D4<{1,1,?,?}, "64">; -def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">; -def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">; -def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">; -def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">; +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; -def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; +def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), - IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; -} -class VST2Q<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { +class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), + itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; -def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">; -def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; -def VST2q8 : VST2Q<{0,0,?,?}, "8">; -def VST2q16 : VST2Q<{0,1,?,?}, "16">; -def VST2q32 : VST2Q<{1,0,?,?}, "32">; - -def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; -def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; -def VST2d32Pseudo : VSTQPseudo<IIC_VST2>; +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: -class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), - IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy> { + def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST2QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, - "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; -def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">; -def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; -def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; -def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; -def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; -def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; -def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; - -// ...with double-spaced registers (for disassembly only): -def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; -def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; -def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; -def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">; -def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">; -def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">; +// ...with double-spaced registers +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1458,20 +1938,11 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin> // VST1LN : Vector Store (single element from one lane) class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), - IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVST1LN"; -} -class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), + (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } @@ -1482,16 +1953,17 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> } def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, + addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -1507,14 +1979,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), // ...with address register writeback: class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, + (ins AdrMode:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - addrmode6:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> @@ -1524,16 +1996,16 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> } def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, - extractelt> { + extractelt, addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -1585,10 +2057,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; // ...with address register writeback: class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, - "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []> { + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, + "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; } @@ -1914,8 +2386,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { @@ -1924,8 +2396,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8, class N3VDSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), - NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", + (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { @@ -1954,8 +2426,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3VLane32<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set (ResTy QPR:$Vd), (ResTy (ShOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), @@ -1965,8 +2437,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8, class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), - NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", + (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", [(set (ResTy QPR:$Vd), (ResTy (ShOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_8:$Vm), @@ -1987,8 +2459,8 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set (Ty DPR:$Vd), (Ty (IntOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), @@ -1998,8 +2470,8 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, - (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set (Ty DPR:$Vd), (Ty (IntOp (Ty DPR:$Vn), (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { @@ -2028,8 +2500,8 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3VLane32<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set (ResTy QPR:$Vd), (ResTy (IntOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), @@ -2040,8 +2512,8 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set (ResTy QPR:$Vd), (ResTy (IntOp (ResTy QPR:$Vn), (ResTy (NEONvduplane (OpTy DPR_8:$Vm), @@ -2073,9 +2545,9 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$Vn, @@ -2086,9 +2558,9 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", [(set (Ty DPR:$Vd), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$Vn, @@ -2108,9 +2580,9 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", [(set (ResTy QPR:$Vd), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$Vn, @@ -2122,9 +2594,9 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, SDNode MulOp, SDNode ShOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", [(set (ResTy QPR:$Vd), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$Vn, @@ -2182,9 +2654,9 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), - (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), (TyQ (MulOp (TyD DPR:$Vn), @@ -2194,9 +2666,9 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), - (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), (TyQ (MulOp (TyD DPR:$Vn), @@ -2230,9 +2702,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), - (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), + (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", [(set (ResTy QPR:$Vd), (ResTy (IntOp (ResTy QPR:$src1), (OpTy DPR:$Vn), @@ -2243,9 +2715,9 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), - (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), + (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), NVMulSLFrm, itin, - OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", + OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", [(set (ResTy QPR:$Vd), (ResTy (IntOp (ResTy QPR:$src1), (OpTy DPR:$Vn), @@ -2277,8 +2749,8 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; @@ -2286,8 +2758,8 @@ class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; @@ -2332,8 +2804,8 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set (ResTy QPR:$Vd), (ResTy (IntOp (OpTy DPR:$Vn), (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), @@ -2342,8 +2814,8 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, - (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), - NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", + (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), + NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", [(set (ResTy QPR:$Vd), (ResTy (IntOp (OpTy DPR:$Vn), (OpTy (NEONvduplane (OpTy DPR_8:$Vm), @@ -2417,9 +2889,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Long shift by immediate. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm, + (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2649,14 +3121,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v4i32, v4i32, OpNode, Commutable>; } -multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), - v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), +multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", v4i32, v2i32, ShOp>; } @@ -3165,7 +3634,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + string baseOpc, SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { @@ -3199,6 +3668,33 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx + + // Aliases for two-operand forms (source and dest regs the same). + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8")) + DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16")) + DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v2i32")) + DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v1i64")) + DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8")) + QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16")) + QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32")) + QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v2i64")) + QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; } // Neon Shift-Accumulate vector operations, @@ -3321,15 +3817,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, SDNode OpNode> { def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -3418,7 +3914,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; @@ -3509,10 +4005,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -3567,10 +4063,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -3619,6 +4115,37 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; +// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. +def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", + v2f32, fmul_su, fadd_mlx>, + Requires<[HasVFP4,UseFusedMAC]>; + +def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", + v4f32, fmul_su, fadd_mlx>, + Requires<[HasVFP4,UseFusedMAC]>; + +// Fused Vector Multiply Subtract (floating-point) +def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", + v2f32, fmul_su, fsub_mlx>, + Requires<[HasVFP4,UseFusedMAC]>; +def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", + v4f32, fmul_su, fsub_mlx>, + Requires<[HasVFP4,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)), + (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)), + (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) @@ -3741,7 +4268,7 @@ def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, - (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), IIC_VMOVImm, "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, @@ -3750,7 +4277,7 @@ def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, } def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, - (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), IIC_VMOVImm, "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, @@ -3759,7 +4286,7 @@ def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, } def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, - (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), IIC_VMOVImm, "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, @@ -3768,7 +4295,7 @@ def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, } def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, - (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), IIC_VMOVImm, "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, @@ -3790,7 +4317,7 @@ def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (vnotq QPR:$Vm))))]>; def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, - (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), IIC_VMOVImm, "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, @@ -3799,7 +4326,7 @@ def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, } def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, - (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), IIC_VMOVImm, "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, @@ -3808,7 +4335,7 @@ def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, } def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, - (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), IIC_VMOVImm, "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, @@ -3817,7 +4344,7 @@ def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, } def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, - (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), IIC_VMOVImm, "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, @@ -3842,28 +4369,28 @@ def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), let isReMaterializable = 1 in { def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmSplatI16:$SIMM), IIC_VMOVImm, "vmvn", "i16", "$Vd, $SIMM", "", [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmSplatI16:$SIMM), IIC_VMOVImm, "vmvn", "i16", "$Vd, $SIMM", "", [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, "vmvn", "i32", "$Vd, $SIMM", "", [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { let Inst{11-8} = SIMM{11-8}; } def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, "vmvn", "i32", "$Vd, $SIMM", "", [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { let Inst{11-8} = SIMM{11-8}; @@ -3912,12 +4439,12 @@ def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; // VBIT : Vector Bitwise Insert if True // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", @@ -3926,12 +4453,12 @@ def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking // for equivalent operations with different register constraints; it just @@ -4119,8 +4646,10 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; -defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", + NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", + NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -4129,18 +4658,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, - ValueType OpTy, SDNode OpNode> + ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, - ResTy, OpTy, OpNode> { + ResTy, OpTy, ImmTy, OpNode> { let Inst{21-16} = op21_16; let DecoderMethod = "DecodeVSHLMaxInstruction"; } def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", - v8i16, v8i8, NEONvshlli>; + v8i16, v8i8, imm8, NEONvshlli>; def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", - v4i32, v4i16, NEONvshlli>; + v4i32, v4i16, imm16, NEONvshlli>; def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", - v2i64, v2i32, NEONvshlli>; + v2i64, v2i32, imm32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", @@ -4154,8 +4683,10 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; -defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", + NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", + NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -4298,13 +4829,15 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", v16i8, v16i8, int_arm_neon_vcnt>; -// Vector Swap -- for disassembly only. +// Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", + []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, - "vswp", "$Vd, $Vm", "", []>; + (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", + []>; // Vector Move Operations. @@ -4318,89 +4851,98 @@ def : InstAlias<"vmov${p} $Vd, $Vm", let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmSplatI8:$SIMM), IIC_VMOVImm, "vmov", "i8", "$Vd, $SIMM", "", [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmSplatI8:$SIMM), IIC_VMOVImm, "vmov", "i8", "$Vd, $SIMM", "", [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmSplatI16:$SIMM), IIC_VMOVImm, "vmov", "i16", "$Vd, $SIMM", "", [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmSplatI16:$SIMM), IIC_VMOVImm, "vmov", "i16", "$Vd, $SIMM", "", [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, "vmov", "i32", "$Vd, $SIMM", "", [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { let Inst{11-8} = SIMM{11-8}; } def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, "vmov", "i32", "$Vd, $SIMM", "", [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { let Inst{11-8} = SIMM{11-8}; } def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmSplatI64:$SIMM), IIC_VMOVImm, "vmov", "i64", "$Vd, $SIMM", "", [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), - (ins nModImm:$SIMM), IIC_VMOVImm, + (ins nImmSplatI64:$SIMM), IIC_VMOVImm, "vmov", "i64", "$Vd, $SIMM", "", [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; + +def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; +def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, - (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]", + (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), + IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), imm:$lane))]> { let Inst{21} = lane{2}; let Inst{6-5} = lane{1-0}; } def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, - (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]", + (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), + IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), imm:$lane))]> { let Inst{21} = lane{1}; let Inst{6} = lane{0}; } def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, - (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]", + (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), + IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), imm:$lane))]> { let Inst{21} = lane{2}; let Inst{6-5} = lane{1-0}; } def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, - (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]", + (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), + IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), imm:$lane))]> { let Inst{21} = lane{1}; let Inst{6} = lane{0}; } def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, - (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane), - IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]", + (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), + IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), imm:$lane))]> { let Inst{21} = lane{0}; @@ -4442,24 +4984,24 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), let Constraints = "$src1 = $V" in { def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), - (ins DPR:$src1, GPR:$R, nohash_imm:$lane), - IIC_VMOVISL, "vmov", "8", "$V[$lane], $R", + (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), + IIC_VMOVISL, "vmov", "8", "$V$lane, $R", [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), GPR:$R, imm:$lane))]> { let Inst{21} = lane{2}; let Inst{6-5} = lane{1-0}; } def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), - (ins DPR:$src1, GPR:$R, nohash_imm:$lane), - IIC_VMOVISL, "vmov", "16", "$V[$lane], $R", + (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), + IIC_VMOVISL, "vmov", "16", "$V$lane, $R", [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), GPR:$R, imm:$lane))]> { let Inst{21} = lane{1}; let Inst{6} = lane{0}; } def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), - (ins DPR:$src1, GPR:$R, nohash_imm:$lane), - IIC_VMOVISL, "vmov", "32", "$V[$lane], $R", + (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), + IIC_VMOVISL, "vmov", "32", "$V$lane, $R", [(set DPR:$V, (insertelt (v2i32 DPR:$src1), GPR:$R, imm:$lane))]> { let Inst{21} = lane{0}; @@ -4627,6 +5169,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, // VMOVL : Vector Lengthening Move defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; +def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; +def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; +def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; // Vector Conversions. @@ -4650,6 +5195,7 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. +let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", @@ -4658,7 +5204,9 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; +} +let DecoderMethod = "DecodeVCVTQ" in { def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", @@ -4667,6 +5215,7 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +} // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, @@ -4759,34 +5308,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; // VEXT : Vector Extract -class VEXTd<string OpcodeStr, string Dt, ValueType Ty> +class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm, + (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), - (Ty DPR:$Vm), imm:$index)))]> { + (Ty DPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -class VEXTq<string OpcodeStr, string Dt, ValueType Ty> +class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), - (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm, + (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), - (Ty QPR:$Vm), imm:$index)))]> { + (Ty QPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -def VEXTd8 : VEXTd<"vext", "8", v8i8> { +def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { let Inst{11-8} = index{3-0}; } -def VEXTd16 : VEXTd<"vext", "16", v4i16> { +def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTd32 : VEXTd<"vext", "32", v2i32> { +def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } @@ -4795,17 +5344,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (i32 imm:$index))), (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; -def VEXTq8 : VEXTq<"vext", "8", v16i8> { +def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { let Inst{11-8} = index{3-0}; } -def VEXTq16 : VEXTq<"vext", "16", v8i16> { +def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTq32 : VEXTq<"vext", "32", v4i32> { +def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } +def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { + let Inst{11} = index{0}; + let Inst{10-8} = 0b000; +} def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), @@ -4825,7 +5378,9 @@ def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; +// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; @@ -4835,7 +5390,9 @@ def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; +// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; @@ -4847,27 +5404,25 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; let DecoderMethod = "DecodeTBLInstruction" in { def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, - "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "", - [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>; + (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, + "vtbl", "8", "$Vd, $Vn, $Vm", "", + [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; + (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; + (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), + (ins VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBL2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>; def VTBL3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; def VTBL4Pseudo @@ -4876,31 +5431,28 @@ def VTBL4Pseudo // VTBX : Vector Table Extension def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, - "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd", + (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 - DPR:$orig, DPR:$Vn, DPR:$Vm)))]>; + DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; + (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), + (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX4 - : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 -def VTBX2Pseudo - : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src), - IIC_VTBX2, "$orig = $dst", []>; def VTBX3Pseudo : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), IIC_VTBX3, "$orig = $dst", []>; @@ -4950,9 +5502,13 @@ def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; def : N3VSMulOpPat<fmul, fadd, VMLAfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat<fmul, fsub, VMLSfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; +def : N3VSMulOpPat<fmul, fadd, VFMAfd>, + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; +def : N3VSMulOpPat<fmul, fsub, VFMSfd>, + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; @@ -5028,3 +5584,1448 @@ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; + +// Vector lengthening move with load, matching extending loads. + +// extload, zextload and sextload for a standard lengthening load. Example: +// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr)) +// (VMOVLuv8i16 (VLDRD addrmode5:$addr))>; +multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) + (VLDRD addrmode5:$addr))>; +} + +// extload, zextload and sextload for a lengthening load which only uses +// half the lanes available. Example: +// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = +// Pat<(v4i16 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)>; +multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, + string InsnLanes, string InsnTy> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length. +// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = +// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)), +// qsub_0)>; +multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty, SubRegIndex RegType> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0)), + RegType)>; +} + +defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 +defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32 +defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64 + +defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 +defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 +defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 + +// Double lengthening - v4i8 -> v4i16 -> v4i32 +defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>; +// v2i8 -> v2i16 -> v2i32 +defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>; +// v2i16 -> v2i32 -> v2i64 +defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>; + +// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 +def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; +def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; +def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), + dsub_0)), dsub_0))>; + +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; +def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; + + +// VADD two-operand aliases. +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSUB two-operand aliases. +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VADDW two-operand aliases. +def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", + (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", + (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", + (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", + (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", + (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", + (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; + +// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +// ... two-operand aliases +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VMUL two-operand aliases. +def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", + (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", + (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", + (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", + (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", + (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", + (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", + (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", + (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", + (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", + (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", + (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", + (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", + (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", + (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", + (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", + (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +// VQADD (register) two-operand aliases. +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; + +// VSHL (register) two-operand aliases. +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +// VLD1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST1LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST1LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST1LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST1LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST1LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST1LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST2LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST2LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", + (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST2LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", + (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD3DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD3DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VLD3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VST3 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST3LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST3LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST3 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + +def VST3dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", + (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; +def VST3dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST3qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", + (ins VecListThreeQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD4 all-lanes pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + +def VLD4DUPdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; +def VLD4DUPdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4DUPqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQAllLanes:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VLD4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VLD4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VLD4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + + +// VLD4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VLD4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VLD4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VLD4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VST4 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +def VST4LNdWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNqWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; +def VST4LNdWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNdWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4LNqWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST4 multiple structure pseudo-instructions. These need special handling for +// the vector operands that the normal instructions don't yet model. +// FIXME: Remove these when the register classes and instructions are updated. +def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + +def VST4dWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4qWB_fixed_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", + (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; +def VST4dWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4dWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourD:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_8 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_16 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +def VST4qWB_register_Asm_32 : + NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", + (ins VecListFourQ:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VMOV takes an optional datatype suffix +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", + (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", + (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", + (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", + (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", + (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", + (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", + (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", + (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", + (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", + (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", + (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", + (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", + (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", + (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", + (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", + (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", + (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", + (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", + (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", + (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", + (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", + (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", + (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", + (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", + (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", + (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", + (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", + (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// Two-operand variants for VEXT +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; + +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", + (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; + +// Two-operand variants for VQDMULH +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VMAX. +def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", + (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", + (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", + (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", + (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", + (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", + (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", + (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm", + (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm", + (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm", + (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm", + (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm", + (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm", + (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm", + (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VMIN. +def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", + (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", + (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", + (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", + (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", + (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", + (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", + (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm", + (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm", + (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm", + (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm", + (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm", + (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm", + (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm", + (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VPADD. +def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm", + (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm", + (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm", + (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm", + (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +// Two-operand variants for VSRA. + // Signed. +def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", + (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", + (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", + (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", + (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm", + (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm", + (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm", + (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm", + (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", + (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", + (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", + (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", + (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm", + (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm", + (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm", + (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm", + (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// Two-operand variants for VSRI. +def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", + (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", + (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", + (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", + (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm", + (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm", + (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm", + (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm", + (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// Two-operand variants for VSLI. +def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", + (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", + (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", + (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", + (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm", + (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm", + (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", + (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", + (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; + +// VSWP allows, but does not require, a type suffix. +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", + (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; + +// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", + (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", + (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; + +// "vmov Rd, #-imm" can be handled via "vmvn". +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", + (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; + +// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, +// these should restrict to just the Q register variants, but the register +// classes are enough to match correctly regardless, so we keep it simple +// and just use MnemonicAlias. +def : NEONMnemonicAlias<"vbicq", "vbic">; +def : NEONMnemonicAlias<"vandq", "vand">; +def : NEONMnemonicAlias<"veorq", "veor">; +def : NEONMnemonicAlias<"vorrq", "vorr">; + +def : NEONMnemonicAlias<"vmovq", "vmov">; +def : NEONMnemonicAlias<"vmvnq", "vmvn">; +// Explicit versions for floating point so that the FPImm variants get +// handled early. The parser gets confused otherwise. +def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; +def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; + +def : NEONMnemonicAlias<"vaddq", "vadd">; +def : NEONMnemonicAlias<"vsubq", "vsub">; + +def : NEONMnemonicAlias<"vminq", "vmin">; +def : NEONMnemonicAlias<"vmaxq", "vmax">; + +def : NEONMnemonicAlias<"vmulq", "vmul">; + +def : NEONMnemonicAlias<"vabsq", "vabs">; + +def : NEONMnemonicAlias<"vshlq", "vshl">; +def : NEONMnemonicAlias<"vshrq", "vshr">; + +def : NEONMnemonicAlias<"vcvtq", "vcvt">; + +def : NEONMnemonicAlias<"vcleq", "vcle">; +def : NEONMnemonicAlias<"vceqq", "vceq">; + +def : NEONMnemonicAlias<"vzipq", "vzip">; +def : NEONMnemonicAlias<"vswpq", "vswp">; + +def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; +def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; + + +// Alias for loading floating point immediates that aren't representable +// using the vmov.f32 encoding but the bitpattern is representable using +// the .i32 encoding. +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", + (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index cedb547..6335229 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1,4 +1,4 @@ -//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===// +//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -91,6 +91,12 @@ def t_imm0_508s4 : Operand<i32> { let ParserMatchClass = t_imm0_508s4_asmoperand; let OperandType = "OPERAND_IMMEDIATE"; } +// Alias use only, so no printer is necessary. +def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; } +def t_imm0_508s4_neg : Operand<i32> { + let ParserMatchClass = t_imm0_508s4_neg_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} // Define Thumb specific addressing modes. @@ -345,6 +351,11 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), let DecoderMethod = "DecodeThumbAddSPImm"; } +def : tInstAlias<"add${p} sp, $imm", + (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; +def : tInstAlias<"add${p} sp, sp, $imm", + (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; + // Can optionally specify SP as a three operand instruction. def : tInstAlias<"add${p} sp, sp, $imm", (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>; @@ -387,6 +398,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { bits<4> Rm; let Inst{6-3} = Rm; let Inst{2-0} = 0b000; + let Unpredictable{2-0} = 0b111; } } @@ -404,15 +416,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // prevent stack-pointer assignments that appear immediately before calls from // potentially appearing dead. let isCall = 1, - // On non-Darwin platforms R9 is callee-saved. - Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [SP] in { + Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotDarwin]> { + Requires<[IsThumb]> { bits<22> func; let Inst{26} = func{21}; let Inst{25-16} = func{20-11}; @@ -426,7 +436,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsNotDarwin]> { + Requires<[IsThumb, HasV5T]> { bits<21> func; let Inst{25-16} = func{20-11}; let Inst{13} = 1; @@ -439,7 +449,7 @@ let isCall = 1, def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>, + Requires<[IsThumb, HasV5T]>, T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; @@ -450,38 +460,7 @@ let isCall = 1, def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>; -} - -let isCall = 1, - // On Darwin R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], - Uses = [R7, SP] in { - // Also used for Thumb2 - def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), - 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], - (tBL pred:$p, t_bltarget:$func)>, - Requires<[IsThumb, IsDarwin]>; - - // ARMv5T and above, also used for Thumb2 - def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), - 4, IIC_Br, [(ARMcall tglobaladdr:$func)], - (tBLXi pred:$p, t_blxtarget:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; - - // Also used for Thumb2 - def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops), - 2, IIC_Br, [(ARMtcall GPR:$func)], - (tBLXr pred:$p, GPR:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; - - // ARMv4T - def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 4, IIC_Br, - [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsDarwin]>; + Requires<[IsThumb, IsThumb1Only]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { @@ -523,28 +502,22 @@ let isBranch = 1, isTerminator = 1 in // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { - // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls - // on Darwin), so it's in ARMInstrThumb2.td. + // IOS versions. + let Uses = [SP] in { def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb]>; } - // Non-Darwin versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in { + // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls + // on IOS), so it's in ARMInstrThumb2.td. + // Non-IOS version: + let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, - Requires<[IsThumb, IsNotDarwin]>; - def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsNotDarwin]>; + Requires<[IsThumb, IsNotIOS]>; } } @@ -652,7 +625,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, } // Load tconstpool -// FIXME: Use ldr.n to work around a Darwin assembler bug. +// FIXME: Use ldr.n to work around a darwin assembler bug. let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, "ldr", ".n\t$Rt, $addr", @@ -666,10 +639,9 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, } // FIXME: Remove this entry when the above ldr.n workaround is fixed. -// For disassembly use only. -def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, - "ldr", "\t$Rt, $addr", - [/* disassembly only */]>, +// For assembly/disassembly use only. +def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", []>, T1Encoding<{0,1,0,0,1,?}> { // A6.2 & A8.6.59 bits<3> Rt; @@ -1131,9 +1103,6 @@ def tRSB : // A8.6.141 "rsb", "\t$Rd, $Rn, #0", [(set tGPR:$Rd, (ineg tGPR:$Rn))]>; -def : tInstAlias<"neg${s}${p} $Rd, $Rm", - (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; - // Subtract with carry register let Uses = [CPSR] in def tSBC : // A8.6.151 @@ -1259,19 +1228,24 @@ def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br, // preserve all of the callee-saved resgisters, which is exactly what we want. // $val is a scratch register for our use. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ], - hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in + hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, 0, NoItinerary, "","", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; -// FIXME: Non-Darwin version(s) +// FIXME: Non-IOS version(s) let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1, Defs = [ R7, LR, SP ] in def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb, IsIOS]>; + +let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12, CPSR ], + isBarrier = 1 in +def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -1309,20 +1283,14 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Direct calls def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, - Requires<[IsThumb, IsNotDarwin]>; -def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>, - Requires<[IsThumb, IsDarwin]>; + Requires<[IsThumb]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>; -def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T]>; // Indirect calls to ARM routines def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsNotDarwin]>; -def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsDarwin]>; + Requires<[IsThumb, HasV5T]>; // zextload i1 -> zextload i8 def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), @@ -1434,3 +1402,16 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>; // nothing). def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : tInstAlias<"neg${s}${p} $Rd, $Rm", + (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + + +// Implied destination operand forms for shifts. +def : tInstAlias<"lsl${s}${p} $Rdm, $imm", + (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>; +def : tInstAlias<"lsr${s}${p} $Rdm, $imm", + (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; +def : tInstAlias<"asr${s}${p} $Rdm, $imm", + (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td index 05dcc89..e6fb9d5 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -1,4 +1,4 @@ -//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===// +//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -65,7 +65,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. -def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; } +def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; } def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getT2SOImmVal(Imm) != -1; }]> { @@ -76,26 +76,39 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ // t2_so_imm_not - Match an immediate that is a complement // of a t2_so_imm. -def t2_so_imm_not : Operand<i32>, - PatLeaf<(imm), [{ +// Note: this pattern doesn't require an encoder method and such, as it's +// only used on aliases (Pat<> and InstAlias<>). The actual encoding +// is handled by the destination instructions, which use t2_so_imm. +def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; } +def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1; -}], t2_so_imm_not_XFORM>; +}], t2_so_imm_not_XFORM> { + let ParserMatchClass = t2_so_imm_not_asmoperand; +} // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. -def t2_so_imm_neg : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; -}], t2_so_imm_neg_XFORM>; +def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } +def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ + int64_t Value = -(int)N->getZExtValue(); + return Value && ARM_AM::getT2SOImmVal(Value) != -1; +}], t2_so_imm_neg_XFORM> { + let ParserMatchClass = t2_so_imm_neg_asmoperand; +} /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095 : Operand<i32>, - ImmLeaf<i32, [{ +def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; } +def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 4096; -}]>; +}]> { + let ParserMatchClass = imm0_4095_asmoperand; +} -def imm0_4095_neg : PatLeaf<(i32 imm), [{ +def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; } +def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 4096; -}], imm_neg_XFORM>; +}], imm_neg_XFORM> { + let ParserMatchClass = imm0_4095_neg_asmoperand; +} def imm0_255_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 255; @@ -129,6 +142,12 @@ def t2ldrlabel : Operand<i32> { let PrintMethod = "printT2LdrLabelOperand"; } +def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";} +def t2ldr_pcrel_imm12 : Operand<i32> { + let ParserMatchClass = t2ldr_pcrel_imm12_asmoperand; + // used for assembler pseudo instruction and maps to t2ldrlabel, so + // doesn't need encoder or print methods of its own. +} // ADR instruction labels. def t2adrlabel : Operand<i32> { @@ -545,6 +564,11 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, string baseOpc, bit Commutable = 0> : T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> { + // Assembler aliases w/ the ".w" suffix. + def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, + t2_so_imm:$imm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, @@ -556,6 +580,10 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, cc_out:$s)>; // and with the optional destination operand, too. + def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + t2_so_imm:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, @@ -608,25 +636,48 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> { /// /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. -let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { -multiclass T2I_bin_s_irs<bits<4> opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { +let hasPostISelHook = 1, Defs = [CPSR] in { +multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir, + InstrItinClass iis, PatFrag opnode, + bit Commutable = 0> { // shifted imm - def ri : T2sTwoRegImm< - (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii, - opc, ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>; + def ri : t2PseudoInst<(outs rGPR:$Rd), + (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p), + 4, iii, + [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, + t2_so_imm:$imm))]>; // register - def rr : T2sThreeReg< - (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir, - opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>; + def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p), + 4, iir, + [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, + rGPR:$Rm))]> { + let isCommutable = Commutable; + } // shifted register - def rs : T2sTwoRegShiftedReg< - (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis, - opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>; + def rs : t2PseudoInst<(outs rGPR:$Rd), + (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p), + 4, iis, + [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn, + t2_so_reg:$ShiftedRm))]>; +} +} + +/// T2I_rbin_s_is - Same as T2I_bin_s_irs, except selection DAG +/// operands are reversed. +let hasPostISelHook = 1, Defs = [CPSR] in { +multiclass T2I_rbin_s_is<PatFrag opnode> { + // shifted imm + def ri : t2PseudoInst<(outs rGPR:$Rd), + (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p), + 4, IIC_iALUi, + [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, + GPRnopc:$Rn))]>; + // shifted register + def rs : t2PseudoInst<(outs rGPR:$Rd), + (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p), + 4, IIC_iALUsi, + [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, + GPRnopc:$Rn))]>; } } @@ -735,26 +786,6 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, } } -/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register -/// version is not needed since this is only for codegen. -/// -/// These opcodes will be converted to the real non-S opcodes by -/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. -let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in { -multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { - // shifted imm - def ri : T2sTwoRegImm< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - opc, ".w\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>; - // shifted register - def rs : T2sTwoRegShiftedReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>; -} -} - /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift / // rotate operation that produces a value. multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, @@ -930,7 +961,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let DecoderMethod = "DecodeT2LoadShift"; } - // FIXME: Is the pci variant actually needed? + // pci variant is very similar to i12, but supports negative offsets + // from the PC. def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { @@ -1315,14 +1347,16 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword -let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr), IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; // Indexed stores + +let mayStore = 1, neverHasSideEffects = 1 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, t2addrmode_imm8:$addr), + (ins GPRnopc:$Rt, t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, "str", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { @@ -1343,15 +1377,16 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8"; } +} // mayStore = 1, neverHasSideEffects = 1 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), - (ins rGPR:$Rt, addr_offset_none:$Rn, + (ins GPRnopc:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStore_iu, "str", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPRnopc:$Rn_wb, - (post_store rGPR:$Rt, addr_offset_none:$Rn, + (post_store GPRnopc:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$offset))]>; def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb), @@ -1398,7 +1433,6 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb), (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>; } - // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly // only. // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 @@ -1455,7 +1489,7 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), "$addr.base = $wb", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future -// data/instruction access. These are for disassembly only. +// data/instruction access. // instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0), // (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1). multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { @@ -1513,6 +1547,10 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let DecoderMethod = "DecodeT2LoadShift"; } + // FIXME: We should have a separate 'pci' variant here. As-is we represent + // it via the i12 variant, which it's related to, but that means we can + // represent negative immediates, which aren't legal for anything except + // the 'pci' case (Rn == 15). } defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>; @@ -1689,6 +1727,8 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, let Inst{14-12} = 0b000; let Inst{7-4} = 0b0000; } +def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, + pred:$p, zero_reg)>; def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, pred:$p, CPSR)>; def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm, @@ -1837,11 +1877,9 @@ defm t2SUB : T2I_bin_ii12rs<0b101, "sub", // FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen // support for an optional CPSR definition that corresponds to the DAG // node's second value. We can then eliminate the implicit def of CPSR. -defm t2ADDS : T2I_bin_s_irs <0b1000, "add", - IIC_iALUi, IIC_iALUr, IIC_iALUsi, +defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>; -defm t2SUBS : T2I_bin_s_irs <0b1101, "sub", - IIC_iALUi, IIC_iALUr, IIC_iALUsi, +defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi, BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; let hasPostISelHook = 1 in { @@ -1857,8 +1895,7 @@ defm t2RSB : T2I_rbin_irs <0b1110, "rsb", // FIXME: Eliminate them if we can write def : Pat patterns which defines // CPSR and the implicit def of CPSR is not needed. -defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb", - BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; +defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub @@ -2840,6 +2877,8 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, pred:$p), 4, IIC_iCMOVr, @@ -2883,7 +2922,7 @@ def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst), let isMoveImm = 1 in def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), - IIC_iCMOVi, "mvn", ".w\t$Rd, $imm", + IIC_iCMOVi, "mvn", "\t$Rd, $imm", [/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd"> { @@ -2922,6 +2961,35 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, RegConstraint<"$false = $Rd">; } // isCodeGenOnly = 1 + +multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, + InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { + // shifted imm + def ri : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s), + 4, iii, [], + (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + // register + def rr : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s), + 4, iir, [], + (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + // shifted register + def rs : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s), + 4, iis, [], + (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; +} // T2I_bincc_irs + +defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -3043,9 +3111,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, let Inst{11-8} = Rd; let Inst{7-0} = addr{7-0}; } -} - -let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in +let hasExtraSrcRegAllocReq = 1 in def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, @@ -3054,6 +3120,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), bits<4> Rt2; let Inst{11-8} = Rt2; } +} def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, Requires<[IsThumb2, HasV7]> { @@ -3081,8 +3148,9 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, // $val is a scratch register for our use. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], - hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in { + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], + hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, 0, NoItinerary, "", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, @@ -3091,7 +3159,8 @@ let Defs = let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR ], - hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in { + hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), AddrModeNone, 0, NoItinerary, "", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>, @@ -3128,6 +3197,7 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{13} = target{17}; let Inst{21-16} = target{16-11}; let Inst{10-0} = target{10-0}; + let DecoderMethod = "DecodeT2BInstruction"; } let isNotDuplicable = 1, isIndirectBranch = 1 in { @@ -3195,19 +3265,32 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, let DecoderMethod = "DecodeThumb2BCCInstruction"; } -// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so +// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so // it goes here. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // Darwin version. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], - Uses = [SP] in + // IOS version. + let Uses = [SP] in def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (t2B uncondbrtarget:$dst, pred:$p)>, - Requires<[IsThumb2, IsDarwin]>; + Requires<[IsThumb2, IsIOS]>; } +let isCall = 1, Defs = [LR], Uses = [SP] in { + // mov lr, pc; b if callee is marked noreturn to avoid confusing the + // return stack predictor. + def t2BMOVPCB_CALL : tPseudoInst<(outs), + (ins t_bltarget:$func, variable_ops), + 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, + Requires<[IsThumb]>; +} + +// Direct calls +def : T2Pat<(ARMcall_nolink texternalsym:$func), + (t2BMOVPCB_CALL texternalsym:$func)>, + Requires<[IsThumb]>; + // IT block let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), @@ -3430,7 +3513,7 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), imm:$cp))]>, Requires<[IsThumb2]>; -// Pseudo isntruction that combines movs + predicated rsbmi +// Pseudo isntruction that combines movs + predicated rsbmi // to implement integer ABS let usesCustomInserter = 1, Defs = [CPSR] in { def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src), @@ -3667,20 +3750,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm", + (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", + (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; /* from coprocessor to ARM core register */ def t2MRC : t2MovRCopro<0b1110, "mrc", 1, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -3851,6 +3946,29 @@ def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm", (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// ... and with the destination and source register combined. +def : t2InstAlias<"add${s}${p} $Rdn, $imm", + (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${p} $Rdn, $imm", + (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p} $Rdn, $Rm", + (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", + (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + +// add w/ negative immediates is just a sub. +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rd, $Rn, $imm", + (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p} $Rdn, $imm", + (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rdn, $imm", + (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + // Aliases for SUB without the ".w" optional width specifier. def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", @@ -3862,6 +3980,18 @@ def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm", def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm", (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// ... and with the destination and source register combined. +def : t2InstAlias<"sub${s}${p} $Rdn, $imm", + (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${p} $Rdn, $imm", + (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm", + (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${s}${p} $Rdn, $Rm", + (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm", + (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; // Alias for compares without the ".w" optional width specifier. def : t2InstAlias<"cmn${p} $Rn, $Rm", @@ -3900,7 +4030,20 @@ def : t2InstAlias<"ldrsb${p} $Rt, $addr", def : t2InstAlias<"ldrsh${p} $Rt, $addr", (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; -// Alias for MVN without the ".w" optional width specifier. +def : t2InstAlias<"ldr${p} $Rt, $addr", + (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p} $Rt, $addr", + (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p} $Rt, $addr", + (t2LDRHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p} $Rt, $addr", + (t2LDRSBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p} $Rt, $addr", + (t2LDRSHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; + +// Alias for MVN with(out) the ".w" optional width specifier. +def : t2InstAlias<"mvn${s}${p}.w $Rd, $imm", + (t2MVNi rGPR:$Rd, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"mvn${s}${p} $Rd, $Rm", (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm", @@ -3921,6 +4064,30 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; +// STMIA/STMIA_UPD aliases w/o the optional .w suffix +def : t2InstAlias<"stm${p} $Rn, $regs", + (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"stm${p} $Rn!, $regs", + (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + +// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix +def : t2InstAlias<"ldm${p} $Rn, $regs", + (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"ldm${p} $Rn!, $regs", + (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + +// STMDB/STMDB_UPD aliases w/ the optional .w suffix +def : t2InstAlias<"stmdb${p}.w $Rn, $regs", + (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"stmdb${p}.w $Rn!, $regs", + (t2STMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + +// LDMDB/LDMDB_UPD aliases w/ the optional .w suffix +def : t2InstAlias<"ldmdb${p}.w $Rn, $regs", + (t2LDMDB GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"ldmdb${p}.w $Rn!, $regs", + (t2LDMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + // Alias for REV/REV16/REVSH without the ".w" optional width specifier. def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>; def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>; @@ -4016,3 +4183,87 @@ def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot", (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; + + +// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like +// for isel. +def : t2InstAlias<"mov${p} $Rd, $imm", + (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +def : t2InstAlias<"mvn${p} $Rd, $imm", + (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; +// Same for AND <--> BIC +def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm", + (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"bic${s}${p} $Rdn, $imm", + (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm", + (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"and${s}${p} $Rdn, $imm", + (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + pred:$p, cc_out:$s)>; +// Likewise, "add Rd, t2_so_imm_neg" -> sub +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${s}${p} $Rd, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via t2_so_imm_neg +def : t2InstAlias<"cmp${p} $Rd, $imm", + (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; +def : t2InstAlias<"cmn${p} $Rd, $imm", + (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>; + + +// Wide 'mul' encoding can be specified with only two operands. +def : t2InstAlias<"mul${p} $Rn, $Rm", + (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>; + +// "neg" is and alias for "rsb rd, rn, #0" +def : t2InstAlias<"neg${s}${p} $Rd, $Rm", + (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>; + +// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for +// these, unfortunately. +def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift", + (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift", + (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; + +def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift", + (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift", + (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; + +// ADR w/o the .w suffix +def : t2InstAlias<"adr${p} $Rd, $addr", + (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; + +// LDR(literal) w/ alternate [pc, #imm] syntax. +def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRSBpcrel : t2AsmPseudo<"ldrsb${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr", + (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + // Version w/ the .w suffix. +def : t2InstAlias<"ldr${p}.w $Rt, $addr", + (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p}.w $Rt, $addr", + (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p}.w $Rt, $addr", + (t2LDRHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p}.w $Rt, $addr", + (t2LDRSBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p}.w $Rt, $addr", + (t2LDRSHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>; + +def : t2InstAlias<"add${p} $Rd, pc, $imm", + (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td index e746cf2..3600b88 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -1,4 +1,4 @@ -//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===// +//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -61,6 +61,22 @@ def vfp_f64imm : Operand<f64>, let ParserMatchClass = FPImmOperand; } +// The VCVT to/from fixed-point instructions encode the 'fbits' operand +// (the number of fixed bits) differently than it appears in the assembly +// source. It's encoded as "Size - fbits" where Size is the size of the +// fixed-point representation (32 or 16) and fbits is the value appearing +// in the assembly source, an integer in [0,16] or (0,32], depending on size. +def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; } +def fbits32 : Operand<i32> { + let PrintMethod = "printFBits32"; + let ParserMatchClass = fbits32_asm_operand; +} + +def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; } +def fbits16 : Operand<i32> { + let PrintMethod = "printFBits16"; + let ParserMatchClass = fbits16_asm_operand; +} //===----------------------------------------------------------------------===// // Load / store Instructions. @@ -69,11 +85,11 @@ def vfp_f64imm : Operand<f64>, let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), - IIC_fpLoad64, "vldr", ".64\t$Dd, $addr", + IIC_fpLoad64, "vldr", "\t$Dd, $addr", [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), - IIC_fpLoad32, "vldr", ".32\t$Sd, $addr", + IIC_fpLoad32, "vldr", "\t$Sd, $addr", [(set SPR:$Sd, (load addrmode5:$addr))]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. @@ -83,11 +99,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), - IIC_fpStore64, "vstr", ".64\t$Dd, $addr", + IIC_fpStore64, "vstr", "\t$Dd, $addr", [(store (f64 DPR:$Dd), addrmode5:$addr)]>; def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), - IIC_fpStore32, "vstr", ".32\t$Sd, $addr", + IIC_fpStore32, "vstr", "\t$Sd, $addr", [(store SPR:$Sd, addrmode5:$addr)]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. @@ -190,6 +206,14 @@ def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>, Requires<[HasVFP2]>; def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>, Requires<[HasVFP2]>; +defm : VFPDTAnyInstAlias<"vpush${p}", "$r", + (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpush${p}", "$r", + (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpop${p}", "$r", + (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpop${p}", "$r", + (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>; // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores @@ -270,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; // These are encoded as unary instructions. -let Defs = [FPSCR] in { +let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", @@ -299,7 +323,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } -} // Defs = [FPSCR] +} // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// // FP Unary Operations. @@ -319,7 +343,7 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, let D = VFPNeonA8Domain; } -let Defs = [FPSCR] in { +let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", @@ -360,7 +384,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } -} // Defs = [FPSCR] +} // Defs = [FPSCR_NZCV] def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), @@ -790,127 +814,131 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, // S32 (U=0, sx=1) -> SL // U32 (U=1, sx=1) -> UL -// FIXME: Marking these as codegen only seems wrong. They are real -// instructions(?) -let Constraints = "$a = $dst", isCodeGenOnly = 1 in { +let Constraints = "$a = $dst" in { // FP to Fixed-Point: -def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { +// Single Precision register +class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + bits<5> dst; + // if dp_operation then UInt(D:Vd) else UInt(Vd:D); + let Inst{22} = dst{0}; + let Inst{15-12} = dst{4-1}; +} + +// Double Precision register +class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + bits<5> dst; + // if dp_operation then UInt(D:Vd) else UInt(Vd:D); + let Inst{22} = dst{4}; + let Inst{15-12} = dst{3-0}; +} + +def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { +def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { +def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { +def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; +def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>; -def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; +def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>; -def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; +def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>; -def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; +def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>; // Fixed-Point to FP: -def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { +def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { +def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { +def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, - (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), - IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]> { +def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; } -def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; +def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>; -def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; +def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0, + (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>; -def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; +def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>; -def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, - (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits), - IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", - [/* For disassembly only; pattern left blank */]>; +def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, + (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>; -} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in' +} // End of 'let Constraints = "$a = $dst" in' //===----------------------------------------------------------------------===// // FP Multiply-Accumulate Operations. @@ -922,7 +950,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -930,7 +958,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -938,10 +966,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -949,7 +977,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -957,7 +985,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -965,10 +993,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -976,7 +1004,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -984,7 +1012,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -992,10 +1020,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1003,14 +1031,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1018,11 +1046,172 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + +//===----------------------------------------------------------------------===// +// Fused FP Multiply-Accumulate Operations. +// +def VFMAD : ADbI<0b11101, 0b10, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,UseFusedMAC]>; + +def VFMAS : ASbIn<0b11101, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,UseFusedMAC]>; +def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)), + (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)), + (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + +def VFMSD : ADbI<0b11101, 0b10, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,UseFusedMAC]>; + +def VFMSS : ASbIn<0b11101, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), + (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,UseFusedMAC]>; +def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), + (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fma (fneg x), y, z) -> (vfms x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fneg (fma x, (fneg y), z) -> (vfms x, y, z) +def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + +def VFNMAD : ADbI<0b11101, 0b01, 1, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,UseFusedMAC]>; + +def VFNMAS : ASbI<0b11101, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), + SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} +def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), + (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,UseFusedMAC]>; +def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), + (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fneg (fma x, y, z)) -> (vfnma x, y, z) +def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; + +def VFNMSD : ADbI<0b11101, 0b01, 0, 0, + (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), + IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm", + [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), + (f64 DPR:$Ddin)))]>, + RegConstraint<"$Ddin = $Dd">, + Requires<[HasVFP4,UseFusedMAC]>; + +def VFNMSS : ASbI<0b11101, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", + [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines. +} + +def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), + (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP4,UseFusedMAC]>; +def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), + (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z) +def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma x, (fneg y), z) -> (vnfms x, y, z) +def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; //===----------------------------------------------------------------------===// // FP Conditional moves. @@ -1063,9 +1252,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm, // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags // to APSR. -let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in +let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins), - "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>; + "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>; // Application level FPSCR -> GPR let hasSideEffects = 1, Uses = [FPSCR] in @@ -1079,6 +1268,10 @@ let Uses = [FPSCR] in { "vmrs", "\t$Rt, fpexc", []>; def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins), "vmrs", "\t$Rt, fpsid", []>; + def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr0", []>; + def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr1", []>; } //===----------------------------------------------------------------------===// @@ -1160,6 +1353,115 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), //===----------------------------------------------------------------------===// // Assembler aliases. // +// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to +// support them all, but supporting at least some of the basics is +// good to be friendly. +def : VFP2MnemonicAlias<"flds", "vldr">; +def : VFP2MnemonicAlias<"fldd", "vldr">; +def : VFP2MnemonicAlias<"fmrs", "vmov">; +def : VFP2MnemonicAlias<"fmsr", "vmov">; +def : VFP2MnemonicAlias<"fsqrts", "vsqrt">; +def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">; +def : VFP2MnemonicAlias<"fadds", "vadd.f32">; +def : VFP2MnemonicAlias<"faddd", "vadd.f64">; +def : VFP2MnemonicAlias<"fmrdd", "vmov">; +def : VFP2MnemonicAlias<"fmrds", "vmov">; +def : VFP2MnemonicAlias<"fmrrd", "vmov">; +def : VFP2MnemonicAlias<"fmdrr", "vmov">; +def : VFP2MnemonicAlias<"fmuls", "vmul.f32">; +def : VFP2MnemonicAlias<"fmuld", "vmul.f64">; +def : VFP2MnemonicAlias<"fnegs", "vneg.f32">; +def : VFP2MnemonicAlias<"fnegd", "vneg.f64">; +def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">; +def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">; +def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">; +def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">; +def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">; +def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">; +def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">; +def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">; +def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">; +def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">; +def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">; +def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">; +def : VFP2MnemonicAlias<"fsts", "vstr">; +def : VFP2MnemonicAlias<"fstd", "vstr">; +def : VFP2MnemonicAlias<"fmacd", "vmla.f64">; +def : VFP2MnemonicAlias<"fmacs", "vmla.f32">; +def : VFP2MnemonicAlias<"fcpys", "vmov.f32">; +def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">; +def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">; +def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">; +def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">; +def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">; +def : VFP2MnemonicAlias<"fmrx", "vmrs">; +def : VFP2MnemonicAlias<"fmxr", "vmsr">; + +// Be friendly and accept the old form of zero-compare +def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; +def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>; -def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; +def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; +def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", + (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm", + (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm", + (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm", + (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>; + +// No need for the size suffix on VSQRT. It's implied by the register classes. +def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>; +def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>; + +// VLDR/VSTR accept an optional type suffix. +def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr", + (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr", + (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr", + (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr", + (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; + +// VMUL has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm", + (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm", + (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; +// VADD has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm", + (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm", + (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; +// VSUB has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm", + (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm", + (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; + +// VMOV can accept optional 32-bit or less data type suffix suffix. +def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; + +def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn", + (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2", + (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>; + +// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way +// VMOVD does. +def : VFP2InstAlias<"vmov${p} $Sd, $Sm", + (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp index 45b7e48..98930cc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.cpp @@ -13,7 +13,7 @@ #define DEBUG_TYPE "jit" #include "ARMJITInfo.h" -#include "ARMInstrInfo.h" +#include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" @@ -61,7 +61,7 @@ extern "C" { // concerned, so we can't just preserve the callee saved regs. "stmdb sp!, {r0, r1, r2, r3, lr}\n" #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" + "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // The LR contains the address of the stub function on entry. // pass it as the argument to the C part of the callback @@ -85,7 +85,7 @@ extern "C" { // #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) // Restore VFP caller-saved registers. - "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" + "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // // We need to exchange the values in slots 0 and 1 so we can diff --git a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h index 2f97928..7928184 100644 --- a/contrib/llvm/lib/Target/ARM/ARMJITInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMJITInfo.h @@ -1,4 +1,4 @@ -//===- ARMJITInfo.h - ARM implementation of the JIT interface --*- C++ -*-===// +//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index faa8ba7..9ef2ace 100644 --- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// +//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// // // The LLVM Compiler Infrastructure // @@ -15,8 +15,8 @@ #define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -32,6 +32,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -62,6 +64,7 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const ARMSubtarget *STI; ARMFunctionInfo *AFI; RegScavenger *RS; bool isThumb2; @@ -90,7 +93,9 @@ namespace { bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); + DebugLoc dl, + ArrayRef<std::pair<unsigned, bool> > Regs, + ArrayRef<unsigned> ImpDefs); void MergeOpsUpdate(MachineBasicBlock &MBB, MemOpQueue &MemOps, unsigned memOpsBegin, @@ -141,7 +146,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::LDMDB; case ARM_AM::ib: return ARM::LDMIB; } - break; case ARM::STRi12: ++NumSTMGened; switch (Mode) { @@ -151,7 +155,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } - break; case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -160,7 +163,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::t2LDMIA; case ARM_AM::db: return ARM::t2LDMDB; } - break; case ARM::t2STRi8: case ARM::t2STRi12: ++NumSTMGened; @@ -169,7 +171,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::t2STMIA; case ARM_AM::db: return ARM::t2STMDB; } - break; case ARM::VLDRS: ++NumVLDMGened; switch (Mode) { @@ -177,7 +178,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VLDMSIA; case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. } - break; case ARM::VSTRS: ++NumVSTMGened; switch (Mode) { @@ -185,7 +185,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VSTMSIA; case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. } - break; case ARM::VLDRD: ++NumVLDMGened; switch (Mode) { @@ -193,7 +192,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VLDMDIA; case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. } - break; case ARM::VSTRD: ++NumVSTMGened; switch (Mode) { @@ -201,10 +199,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::VSTMDIA; case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. } - break; } - - return 0; } namespace llvm { @@ -259,8 +254,6 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::STMIB_UPD: return ARM_AM::ib; } - - return ARM_AM::bad_am_submode; } } // end namespace ARM_AM @@ -291,7 +284,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector<std::pair<unsigned, bool>, 8> &Regs) { + ArrayRef<std::pair<unsigned, bool> > Regs, + ArrayRef<unsigned> ImpDefs) { // Only a single register to load / store. Don't bother. unsigned NumRegs = Regs.size(); if (NumRegs <= 1) @@ -359,6 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); + // Add implicit defs for super-registers. + for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) + MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); + return true; } @@ -393,19 +391,29 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, } SmallVector<std::pair<unsigned, bool>, 8> Regs; + SmallVector<unsigned, 8> ImpDefs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; // If we are inserting the merged operation after an operation that // uses the same register, make sure to transfer any kill flag. bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); + + // Collect any implicit defs of super-registers. They must be preserved. + for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) + continue; + unsigned DefReg = MO->getReg(); + if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) + ImpDefs.push_back(DefReg); + } } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, - Pred, PredReg, Scratch, dl, Regs)) + Pred, PredReg, Scratch, dl, Regs, ImpDefs)) return; // Merge succeeded, update records. @@ -506,50 +514,84 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, return; } -static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg){ +static bool definesCPSR(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) + // If the instruction has live CPSR def, then it's not safe to fold it + // into load / store. + return true; + } + + return false; +} + +static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, unsigned Limit, + ARMCC::CondCodes Pred, unsigned PredReg) { unsigned MyPredReg = 0; if (!MI) return false; - if (MI->getOpcode() != ARM::t2SUBri && - MI->getOpcode() != ARM::tSUBspi && - MI->getOpcode() != ARM::SUBri) - return false; + + bool CheckCPSRDef = false; + switch (MI->getOpcode()) { + default: return false; + case ARM::t2SUBri: + case ARM::SUBri: + CheckCPSRDef = true; + // fallthrough + case ARM::tSUBspi: + break; + } // Make sure the offset fits in 8 bits. if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME - return (MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg); + if (!(MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + (MI->getOperand(2).getImm()*Scale) == Bytes && + getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg)) + return false; + + return CheckCPSRDef ? !definesCPSR(MI) : true; } -static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, unsigned Limit, - ARMCC::CondCodes Pred, unsigned PredReg){ +static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, + unsigned Bytes, unsigned Limit, + ARMCC::CondCodes Pred, unsigned PredReg) { unsigned MyPredReg = 0; if (!MI) return false; - if (MI->getOpcode() != ARM::t2ADDri && - MI->getOpcode() != ARM::tADDspi && - MI->getOpcode() != ARM::ADDri) - return false; + + bool CheckCPSRDef = false; + switch (MI->getOpcode()) { + default: return false; + case ARM::t2ADDri: + case ARM::ADDri: + CheckCPSRDef = true; + // fallthrough + case ARM::tADDspi: + break; + } if (Bytes == 0 || (Limit && Bytes >= Limit)) // Make sure the offset fits in 8 bits. return false; unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME - return (MI->getOperand(0).getReg() == Base && - MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && - MyPredReg == PredReg); + if (!(MI->getOperand(0).getReg() == Base && + MI->getOperand(1).getReg() == Base && + (MI->getOperand(2).getImm()*Scale) == Bytes && + getInstrPredicate(MI, MyPredReg) == Pred && + MyPredReg == PredReg)) + return false; + + return CheckCPSRDef ? !definesCPSR(MI) : true; } static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { @@ -603,7 +645,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::da: return ARM::LDMDA_UPD; case ARM_AM::db: return ARM::LDMDB_UPD; } - break; case ARM::STMIA: case ARM::STMDA: case ARM::STMDB: @@ -615,7 +656,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::da: return ARM::STMDA_UPD; case ARM_AM::db: return ARM::STMDB_UPD; } - break; case ARM::t2LDMIA: case ARM::t2LDMDB: switch (Mode) { @@ -623,7 +663,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::ia: return ARM::t2LDMIA_UPD; case ARM_AM::db: return ARM::t2LDMDB_UPD; } - break; case ARM::t2STMIA: case ARM::t2STMDB: switch (Mode) { @@ -631,38 +670,31 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, case ARM_AM::ia: return ARM::t2STMIA_UPD; case ARM_AM::db: return ARM::t2STMDB_UPD; } - break; case ARM::VLDMSIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA_UPD; case ARM_AM::db: return ARM::VLDMSDB_UPD; } - break; case ARM::VLDMDIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA_UPD; case ARM_AM::db: return ARM::VLDMDDB_UPD; } - break; case ARM::VSTMSIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA_UPD; case ARM_AM::db: return ARM::VSTMSDB_UPD; } - break; case ARM::VSTMDIA: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA_UPD; case ARM_AM::db: return ARM::VSTMDDB_UPD; } - break; } - - return 0; } /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base @@ -686,7 +718,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, bool BaseKill = MI->getOperand(0).isKill(); unsigned Bytes = getLSMultipleTransferSize(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); @@ -783,7 +815,6 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, return ARM::t2STR_PRE; default: llvm_unreachable("Unhandled opcode!"); } - return 0; } static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, @@ -809,7 +840,6 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, return ARM::t2STR_POST; default: llvm_unreachable("Unhandled opcode!"); } - return 0; } /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base @@ -841,7 +871,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool DoMerge = false; ARM_AM::AddrOpc AddSub = ARM_AM::add; unsigned NewOpc = 0; @@ -1071,11 +1101,17 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned Opcode = MI->getOpcode(); if (Opcode == ARM::LDRD || Opcode == ARM::STRD || Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { + const MachineOperand &BaseOp = MI->getOperand(2); + unsigned BaseReg = BaseOp.getReg(); unsigned EvenReg = MI->getOperand(0).getReg(); unsigned OddReg = MI->getOperand(1).getReg(); unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); - if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) + // ARM errata 602117: LDRD with base in list may result in incorrect base + // register when interrupted or faulted. + bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); + if (!Errata602117 && + ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) return false; MachineBasicBlock::iterator NewBBI = MBBI; @@ -1087,15 +1123,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool OddDeadKill = isLd ? MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); bool OddUndef = MI->getOperand(1).isUndef(); - const MachineOperand &BaseOp = MI->getOperand(2); - unsigned BaseReg = BaseOp.getReg(); bool BaseKill = BaseOp.isKill(); bool BaseUndef = BaseOp.isUndef(); bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); int OffImm = getMemoryOpOffset(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a @@ -1126,6 +1160,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned NewOpc = (isLd) ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); + // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, + // so adjust and use t2LDRi12 here for that. + unsigned NewOpc2 = (isLd) + ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. @@ -1133,11 +1172,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, (BaseKill || OffKill) && (TRI->regsOverlap(EvenReg, BaseReg))) { assert(!TRI->regsOverlap(OddReg, BaseReg)); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); + if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0) + NewOpc = ARM::t2LDRi12; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1150,12 +1191,16 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenDeadKill = false; OddDeadKill = true; } + // Never kill the base register in the first instruction. + // <rdar://problem/11101911> + if (EvenReg == BaseReg) + EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); @@ -1206,7 +1251,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] @@ -1380,6 +1425,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { AFI = Fn.getInfo<ARMFunctionInfo>(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); + STI = &TM.getSubtarget<ARMSubtarget>(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); @@ -1464,19 +1510,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, while (++I != E) { if (I->isDebugValue() || MemOps.count(&*I)) continue; - const MCInstrDesc &MCID = I->getDesc(); - if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects()) + if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) return false; - if (isLd && MCID.mayStore()) + if (isLd && I->mayStore()) return false; if (!isLd) { - if (MCID.mayLoad()) + if (I->mayLoad()) return false; // It's not safe to move the first 'str' down. // str r1, [r0] // strh r5, [r0] // str r4, [r0, #+4] - if (MCID.mayStore()) + if (I->mayStore()) return false; } for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { @@ -1498,6 +1543,23 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, return AddedRegPressure.size() <= MemRegs.size() * 2; } + +/// Copy Op0 and Op1 operands into a new array assigned to MI. +static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, + MachineInstr *Op1) { + assert(MI->memoperands_empty() && "expected a new machineinstr"); + size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) + + (Op1->memoperands_end() - Op1->memoperands_begin()); + + MachineFunction *MF = MI->getParent()->getParent(); + MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); + MachineSDNode::mmo_iterator MemEnd = + std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); + MemEnd = + std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); + MI->setMemRefs(MemBegin, MemEnd); +} + bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, @@ -1565,7 +1627,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (EvenReg == OddReg) return false; BaseReg = Op0->getOperand(1).getReg(); - Pred = llvm::getInstrPredicate(Op0, PredReg); + Pred = getInstrPredicate(Op0, PredReg); dl = Op0->getDebugLoc(); return true; } @@ -1615,8 +1677,9 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, LastOp = Op; } - unsigned Opcode = Op->getOpcode(); - if (LastOpcode && Opcode != LastOpcode) + unsigned LSMOpcode + = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); + if (LastOpcode && LSMOpcode != LastOpcode) break; int Offset = getMemoryOpOffset(Op); @@ -1627,7 +1690,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } LastOffset = Offset; LastBytes = Bytes; - LastOpcode = Opcode; + LastOpcode = LSMOpcode; if (++NumMove == 8) // FIXME: Tune this limit. break; } @@ -1692,6 +1755,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (!isT2) MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + concatenateMemOperands(MIB, Op0, Op1); + DEBUG(dbgs() << "Formed " << *MIB << "\n"); ++NumLDRDFormed; } else { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) @@ -1704,6 +1769,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (!isT2) MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + concatenateMemOperands(MIB, Op0, Op1); + DEBUG(dbgs() << "Formed " << *MIB << "\n"); ++NumSTRDFormed; } MBB->erase(Op0); @@ -1745,8 +1812,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { while (MBBI != E) { for (; MBBI != E; ++MBBI) { MachineInstr *MI = MBBI; - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.isCall() || MCID.isTerminator()) { + if (MI->isCall() || MI->isTerminator()) { // Stop at barriers. ++MBBI; break; @@ -1758,7 +1824,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; - if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) + if (getInstrPredicate(MI, PredReg) != ARMCC::AL) continue; int Opc = MI->getOpcode(); diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp index daa126d..e2ac9a4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -31,8 +31,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); switch (MO.getTargetFlags()) { - default: - assert(0 && "Unknown target flag on symbol operand"); + default: llvm_unreachable("Unknown target flag on symbol operand"); case 0: break; case ARMII::MO_LO16: @@ -67,9 +66,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { switch (MO.getType()) { - default: - assert(0 && "unknown operand type"); - return false; + default: llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all non-CPSR implicit register operands. if (MO.isImplicit() && MO.getReg() != ARM::CPSR) @@ -107,6 +104,9 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); break; } + case MachineOperand::MO_RegisterMask: + // Ignore call clobbers. + return false; } return true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp new file mode 100644 index 0000000..af445e2 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -0,0 +1,14 @@ +//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMMachineFunctionInfo.h" + +using namespace llvm; + +void ARMFunctionInfo::anchor() { } diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index 138f0c2..f1c8fc8 100644 --- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// +//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,6 +25,7 @@ namespace llvm { /// ARMFunctionInfo - This class is derived from MachineFunctionInfo and /// contains private ARM-specific information for each MachineFunction. class ARMFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); /// isThumb - True if this function is compiled under Thumb mode. /// Used to initialized Align, so must precede it. @@ -63,6 +64,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// GPR callee-saved (2) : r8, r10, r11 /// -------------------------------------------- /// DPR callee-saved : d8 - d15 + /// + /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3. + /// Some may be spilled after the stack has been realigned. unsigned GPRCS1Offset; unsigned GPRCS2Offset; unsigned DPRCSOffset; @@ -79,6 +83,15 @@ class ARMFunctionInfo : public MachineFunctionInfo { BitVector GPRCS2Frames; BitVector DPRCSFrames; + /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in + /// the aligned portion of the stack frame. This is always a contiguous + /// sequence of D-registers starting from d8. + /// + /// We do not keep track of the frame indices used for these registers - they + /// behave like any other frame index in the aligned stack frame. These + /// registers also aren't included in DPRCSSize above. + unsigned NumAlignedDPRCS2Regs; + /// JumpTableUId - Unique id for jumptables. /// unsigned JumpTableUId; @@ -104,6 +117,7 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), + NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {} @@ -137,6 +151,9 @@ public: unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } + unsigned getNumAlignedDPRCS2Regs() const { return NumAlignedDPRCS2Regs; } + void setNumAlignedDPRCS2Regs(unsigned n) { NumAlignedDPRCS2Regs = n; } + unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; } unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; } unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; } diff --git a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h index 18e1620..efa22fb 100644 --- a/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h +++ b/contrib/llvm/lib/Target/ARM/ARMPerfectShuffle.h @@ -1,4 +1,4 @@ -//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===// +//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp index 1cba1ba..6f3819a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===// +//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===// // // The LLVM Compiler Infrastructure // @@ -11,11 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "ARMRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" using namespace llvm; +void ARMRegisterInfo::anchor() { } + ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h index 8edfb9a..8a24842 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===// +//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,15 +15,15 @@ #define ARMREGISTERINFO_H #include "ARM.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "ARMBaseRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct ARMRegisterInfo : public ARMBaseRegisterInfo { + virtual void anchor(); public: ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); }; diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td index 036822d..1466e98 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -1,4 +1,4 @@ -//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===// +//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,8 @@ class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> { field bits<4> Num; let Namespace = "ARM"; let SubRegs = subregs; + // All bits of ARM registers with sub-registers are covered by sub-registers. + let CoveredBySubRegs = 1; } class ARMFReg<bits<6> num, string n> : Register<n> { @@ -25,28 +27,30 @@ class ARMFReg<bits<6> num, string n> : Register<n> { // Subregister indices. let Namespace = "ARM" in { +def qqsub_0 : SubRegIndex; +def qqsub_1 : SubRegIndex; + // Note: Code depends on these having consecutive numbers. -def ssub_0 : SubRegIndex; -def ssub_1 : SubRegIndex; -def ssub_2 : SubRegIndex; // In a Q reg. -def ssub_3 : SubRegIndex; +def qsub_0 : SubRegIndex; +def qsub_1 : SubRegIndex; +def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>; +def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>; def dsub_0 : SubRegIndex; def dsub_1 : SubRegIndex; -def dsub_2 : SubRegIndex; -def dsub_3 : SubRegIndex; -def dsub_4 : SubRegIndex; -def dsub_5 : SubRegIndex; -def dsub_6 : SubRegIndex; -def dsub_7 : SubRegIndex; +def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>; +def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>; +def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>; +def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>; +def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>; +def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>; -def qsub_0 : SubRegIndex; -def qsub_1 : SubRegIndex; -def qsub_2 : SubRegIndex; -def qsub_3 : SubRegIndex; - -def qqsub_0 : SubRegIndex; -def qqsub_1 : SubRegIndex; +def ssub_0 : SubRegIndex; +def ssub_1 : SubRegIndex; +def ssub_2 : SubRegIndex<[dsub_1, ssub_0]>; +def ssub_3 : SubRegIndex<[dsub_1, ssub_1]>; +// Let TableGen synthesize the remaining 12 ssub_* indices. +// We don't need to name them. } // Integer registers @@ -127,9 +131,7 @@ def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>; def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>; // Advanced SIMD (NEON) defines 16 quad-word aliases -let SubRegIndices = [dsub_0, dsub_1], - CompositeIndices = [(ssub_2 dsub_1, ssub_0), - (ssub_3 dsub_1, ssub_1)] in { +let SubRegIndices = [dsub_0, dsub_1] in { def Q0 : ARMReg< 0, "q0", [D0, D1]>; def Q1 : ARMReg< 1, "q1", [D2, D3]>; def Q2 : ARMReg< 2, "q2", [D4, D5]>; @@ -150,45 +152,22 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; } -// Pseudo 256-bit registers to represent pairs of Q registers. These should -// never be present in the emitted code. -// These are used for NEON load / store instructions, e.g., vld4, vst3. -// NOTE: It's possible to define more QQ registers since technically the -// starting D register number doesn't have to be multiple of 4, e.g., -// D1, D2, D3, D4 would be a legal quad, but that would make the subregister -// stuff very messy. -let SubRegIndices = [qsub_0, qsub_1], - CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in { -def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>; -def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>; -def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>; -def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>; -def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>; -def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>; -def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>; -def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>; -} - -// Pseudo 512-bit registers to represent four consecutive Q registers. -let SubRegIndices = [qqsub_0, qqsub_1], - CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), - (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), - (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in { -def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; -def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; -def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>; -def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; -} - // Current Program Status Register. -def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; -def SPSR : ARMReg<2, "spsr">; -def FPSCR : ARMReg<3, "fpscr">; -def ITSTATE : ARMReg<4, "itstate">; +// We model fpscr with two registers: FPSCR models the control bits and will be +// reserved. FPSCR_NZCV models the flag bits and will be unreserved. +def CPSR : ARMReg<0, "cpsr">; +def APSR : ARMReg<1, "apsr">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { + let Aliases = [FPSCR]; +} +def ITSTATE : ARMReg<4, "itstate">; // Special Registers - only available in privileged mode. def FPSID : ARMReg<0, "fpsid">; +def MVFR1 : ARMReg<6, "mvfr1">; +def MVFR0 : ARMReg<7, "mvfr0">; def FPEXC : ARMReg<8, "fpexc">; // Register classes. @@ -261,6 +240,12 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> { }]; } +// Condition code registers. +def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { + let CopyCost = -1; // Don't allow copying of status registers. + let isAllocatable = 0; +} + // Scalar single precision floating point register class.. def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>; @@ -316,37 +301,100 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], (DPR_8 dsub_0, dsub_1)]; } +// Pseudo-registers representing odd-even pairs of D registers. The even-odd +// pairs are already represented by the Q registers. +// These are needed by NEON instructions requiring two consecutive D registers. +// There is no D31_D0 register as that is always an UNPREDICTABLE encoding. +def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], + [(decimate (shl DPR, 1), 2), + (decimate (shl DPR, 2), 2)]>; + +// Register class representing a pair of consecutive D registers. +// Use the Q registers for the even-odd pairs. +def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (interleave QPR, TuplesOE2D)> { + // Allocate starting at non-VFP2 registers D16-D31 first. + // Prefer even-odd pairs as they are easier to copy. + let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))]; + let AltOrderSelect = [{ return 1; }]; +} + +// Pseudo-registers representing 3 consecutive D registers. +def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], + [(shl DPR, 0), + (shl DPR, 1), + (shl DPR, 2)]>; + +// 3 consecutive D registers. +def DTriple : RegisterClass<"ARM", [untyped], 64, (add Tuples3D)> { + let Size = 192; // 3 x 64 bits, we have no predefined type of that size. +} + +// Pseudo 256-bit registers to represent pairs of Q registers. These should +// never be present in the emitted code. +// These are used for NEON load / store instructions, e.g., vld4, vst3. +def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>; + // Pseudo 256-bit vector register class to model pairs of Q registers // (4 consecutive D registers). -def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> { +def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), (QPR qsub_0, qsub_1)]; // Allocate non-VFP2 aliases first. - let AltOrders = [(rotl QQPR, 4)]; + let AltOrders = [(rotl QQPR, 8)]; let AltOrderSelect = [{ return 1; }]; } -// Subset of QQPR that have 32-bit SPR subregs. -def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> { - let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), - (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3), - (QPR_VFP2 qsub_0, qsub_1)]; +// Tuples of 4 D regs that isn't also a pair of Q regs. +def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3], + [(decimate (shl DPR, 1), 2), + (decimate (shl DPR, 2), 2), + (decimate (shl DPR, 3), 2), + (decimate (shl DPR, 4), 2)]>; -} +// 4 consecutive D registers. +def DQuad : RegisterClass<"ARM", [v4i64], 256, + (interleave Tuples2Q, TuplesOE4D)>; + +// Pseudo 512-bit registers to represent four consecutive Q registers. +def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1], + [(shl QQPR, 0), (shl QQPR, 2)]>; // Pseudo 512-bit vector register class to model 4 consecutive Q registers // (8 consecutive D registers). -def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> { +def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, dsub_4, dsub_5, dsub_6, dsub_7), (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; // Allocate non-VFP2 aliases first. - let AltOrders = [(rotl QQQQPR, 2)]; + let AltOrders = [(rotl QQQQPR, 8)]; let AltOrderSelect = [{ return 1; }]; } -// Condition code registers. -def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> { - let CopyCost = -1; // Don't allow copying of status registers. - let isAllocatable = 0; + +// Pseudo-registers representing 2-spaced consecutive D registers. +def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2], + [(shl DPR, 0), + (shl DPR, 2)]>; + +// Spaced pairs of D registers. +def DPairSpc : RegisterClass<"ARM", [v2i64], 64, (add Tuples2DSpc)>; + +def Tuples3DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4], + [(shl DPR, 0), + (shl DPR, 2), + (shl DPR, 4)]>; + +// Spaced triples of D registers. +def DTripleSpc : RegisterClass<"ARM", [untyped], 64, (add Tuples3DSpc)> { + let Size = 192; // 3 x 64 bits, we have no predefined type of that size. } + +def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6], + [(shl DPR, 0), + (shl DPR, 2), + (shl DPR, 4), + (shl DPR, 6)]>; + +// Spaced quads of D registers. +def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMRelocations.h b/contrib/llvm/lib/Target/ARM/ARMRelocations.h index 86e7206..21877fd 100644 --- a/contrib/llvm/lib/Target/ARM/ARMRelocations.h +++ b/contrib/llvm/lib/Target/ARM/ARMRelocations.h @@ -1,4 +1,4 @@ -//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===// +//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td index 958c5c6..45486fd 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td +++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td @@ -1,10 +1,10 @@ -//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===// -// +//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -118,6 +118,8 @@ def IIC_fpMUL32 : InstrItinClass; def IIC_fpMUL64 : InstrItinClass; def IIC_fpMAC32 : InstrItinClass; def IIC_fpMAC64 : InstrItinClass; +def IIC_fpFMAC32 : InstrItinClass; +def IIC_fpFMAC64 : InstrItinClass; def IIC_fpDIV32 : InstrItinClass; def IIC_fpDIV64 : InstrItinClass; def IIC_fpSQRT32 : InstrItinClass; @@ -208,6 +210,8 @@ def IIC_VPERMQ : InstrItinClass; def IIC_VPERMQ3 : InstrItinClass; def IIC_VMACD : InstrItinClass; def IIC_VMACQ : InstrItinClass; +def IIC_VFMACD : InstrItinClass; +def IIC_VFMACQ : InstrItinClass; def IIC_VRECSD : InstrItinClass; def IIC_VRECSQ : InstrItinClass; def IIC_VCNTiD : InstrItinClass; diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td index 8d86c01..8b1fb93 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA8.td @@ -324,6 +324,15 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<19, [A8_NPipe], 0>, InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<20, [A8_NPipe], 0>, @@ -860,6 +869,16 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, + // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td index 49fedf6..0d710cc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -604,6 +604,22 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_NPipe]>], [9, 1, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [8, 1, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [9, 1, 1, 1]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, @@ -1697,6 +1713,26 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [6, 3, 2, 1]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe]>], + [8, 4, 2, 1]>, + // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td index c1880a7..0ace9bc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleV6.td @@ -1,10 +1,10 @@ -//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===// -// +//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM v6 processors. @@ -243,6 +243,12 @@ def ARMV6Itineraries : ProcessorItineraries< // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index a3a3d58..e2530d0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -67,7 +67,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff), isVolatile, - false, 0); + false, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -105,7 +105,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); + SrcPtrInfo.getWithOffset(SrcOff), + false, false, false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; @@ -144,8 +145,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { - // Use default for non AAPCS subtargets - if (!Subtarget->isAAPCS_ABI()) + // Use default for non AAPCS (or Darwin) subtargets + if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) return SDValue(); const ARMTargetLowering &TLI = @@ -188,6 +189,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, 0, // number of fixed arguments TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv false, // is tail call + false, // does not return false, // is return val used DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), TLI.getPointerTy()), // callee diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp index 247d6be..e247b76 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -1,4 +1,4 @@ -//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===// +//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===// // // The LLVM Compiler Infrastructure // @@ -16,7 +16,6 @@ #include "llvm/GlobalValue.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -47,13 +46,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasV7Ops(false) , HasVFPv2(false) , HasVFPv3(false) + , HasVFPv4(false) , HasNEON(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) , SlowFPBrcc(false) , InThumbMode(false) - , InNaClMode(false) , HasThumb2(false) , IsMClass(false) , NoARM(false) @@ -104,18 +103,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, computeIssueWidth(); if (TT.find("eabi") != std::string::npos) + // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g. + // Darwin-EABI conforms to AACPS but not the rest of EABI. TargetABI = ARM_ABI_AAPCS; if (isAAPCS_ABI()) stackAlignment = 8; - if (!isTargetDarwin()) + if (!isTargetIOS()) UseMovt = hasV6T2Ops(); else { IsR9Reserved = ReserveR9 | !HasV6Ops; UseMovt = DarwinUseMOVT && hasV6T2Ops(); - const Triple &T = getTargetTriple(); - SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0); + SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0); } if (!isThumb() || hasThumb2()) diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h index b63e108..e72b06f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h @@ -1,4 +1,4 @@ -//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====// +//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -45,10 +45,11 @@ protected: bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are - /// supported. + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what + /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; + bool HasVFPv4; bool HasNEON; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been @@ -70,9 +71,6 @@ protected: /// InThumbMode - True if compiling for Thumb, false for ARM. bool InThumbMode; - /// InNaClMode - True if targeting Native Client - bool InNaClMode; - /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2; @@ -126,6 +124,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate; + /// HasRAS - Some processors perform return stack prediction. CodeGen should + /// avoid issue "normal" call instructions to callees which do not return. + bool HasRAS; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension; @@ -194,11 +196,13 @@ protected: bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } + bool isCortexM3() const { return CPUString == "cortex-m3"; } bool hasARMOps() const { return !NoARM; } bool hasVFP2() const { return HasVFPv2; } bool hasVFP3() const { return HasVFPv3; } + bool hasVFP4() const { return HasVFPv4; } bool hasNEON() const { return HasNEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } @@ -212,6 +216,7 @@ protected: bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool hasRAS() const { return HasRAS; } bool hasMPExtension() const { return HasMPExtension; } bool hasThumb2DSP() const { return Thumb2DSP; } @@ -220,6 +225,7 @@ protected: const Triple &getTargetTriple() const { return TargetTriple; } + bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NativeClient; diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 96b1e89..047efc2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; static cl::opt<bool> @@ -33,24 +34,32 @@ extern "C" void LLVMInitializeARMTarget() { RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget); } + /// TargetMachine ctor - Create an ARM architecture model. /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI - if (FloatABIType == FloatABI::Default) - FloatABIType = FloatABI::Soft; + if (Options.FloatABIType == FloatABI::Default) + this->Options.FloatABIType = FloatABI::Soft; } +void ARMTargetMachine::anchor() { } + ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget), + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32-S32") : @@ -68,10 +77,14 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, "support ARM mode execution!"); } +void ThumbTargetMachine::anchor() { } + ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), @@ -94,37 +107,62 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } -bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && EnableGlobalMerge) - PM.add(createARMGlobalMergePass(getTargetLowering())); +namespace { +/// ARM Code Generator Pass Configuration Options. +class ARMPassConfig : public TargetPassConfig { +public: + ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + ARMBaseTargetMachine &getARMTargetMachine() const { + return getTM<ARMBaseTargetMachine>(); + } + + const ARMSubtarget &getARMSubtarget() const { + return *getARMTargetMachine().getSubtargetImpl(); + } + + virtual bool addPreISel(); + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { + return new ARMPassConfig(this, PM); +} + +bool ARMPassConfig::addPreISel() { + if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) + PM.add(createGlobalMergePass(TM->getTargetLowering())); return false; } -bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createARMISelDag(*this, OptLevel)); +bool ARMPassConfig::addInstSelector() { + PM.add(createARMISelDag(getARMTargetMachine(), getOptLevel())); return false; } -bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool ARMPassConfig::addPreRegAlloc() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); - if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9()) + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) PM.add(createMLxExpansionPass()); return true; } -bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None) { - if (!Subtarget.isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None) { + if (!getARMSubtarget().isThumb1Only()) { PM.add(createARMLoadStoreOptimizationPass()); - if (Subtarget.hasNEON()) + printAndVerify("After ARM load / store optimizer"); + } + if (getARMSubtarget().hasNEON()) PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } @@ -132,27 +170,31 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, // proper scheduling. PM.add(createARMExpandPseudoPass()); - if (OptLevel != CodeGenOpt::None) { - if (!Subtarget.isThumb1Only()) - PM.add(createIfConverterPass()); + if (getOptLevel() != CodeGenOpt::None) { + if (!getARMSubtarget().isThumb1Only()) + addPass(IfConverterID); } - if (Subtarget.isThumb2()) + if (getARMSubtarget().isThumb2()) PM.add(createThumb2ITBlockPass()); return true; } -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) - PM.add(createThumb2SizeReductionPass()); +bool ARMPassConfig::addPreEmitPass() { + if (getARMSubtarget().isThumb2()) { + if (!getARMSubtarget().prefers32BitThumb()) + PM.add(createThumb2SizeReductionPass()); + + // Constant island pass work on unbundled instructions. + addPass(UnpackMachineBundlesID); + } PM.add(createARMConstantIslandPass()); + return true; } bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index c8c601c..abcdb24 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -41,7 +41,9 @@ private: public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -50,18 +52,15 @@ public: } // Pass Pipeline Configuration - virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - JITCodeEmitter &MCE); + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE); }; /// ARMTargetMachine - ARM target machine. /// class ARMTargetMachine : public ARMBaseTargetMachine { + virtual void anchor(); ARMInstrInfo InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMELFWriterInfo ELFWriterInfo; @@ -71,7 +70,9 @@ class ARMTargetMachine : public ARMBaseTargetMachine { public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const ARMRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -100,6 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { /// Thumb-1 and Thumb-2. /// class ThumbTargetMachine : public ARMBaseTargetMachine { + virtual void anchor(); // Either Thumb1InstrInfo or Thumb2InstrInfo. OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment @@ -111,7 +113,9 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp index 19defa1..a5ea1c2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; using namespace dwarf; @@ -24,8 +25,9 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); + isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI(); - if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { + if (isAAPCS_ABI) { StaticCtorSection = getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | @@ -45,3 +47,33 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, 0, SectionKind::getMetadata()); } + +const MCSection * +ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const { + if (!isAAPCS_ABI) + return TargetLoweringObjectFileELF::getStaticCtorSection(Priority); + + if (Priority == 65535) + return StaticCtorSection; + + // Emit ctors in priority order. + std::string Name = std::string(".init_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); +} + +const MCSection * +ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const { + if (!isAAPCS_ABI) + return TargetLoweringObjectFileELF::getStaticDtorSection(Priority); + + if (Priority == 65535) + return StaticDtorSection; + + // Emit dtors in priority order. + std::string Name = std::string(".fini_array.") + utostr(Priority); + return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY, + ELF::SHF_ALLOC | ELF::SHF_WRITE, + SectionKind::getDataRel()); +} diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h index c6a7261..ff21060 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h @@ -20,6 +20,7 @@ class TargetMachine; class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { protected: const MCSection *AttributesSection; + bool isAAPCS_ABI; public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), @@ -31,6 +32,9 @@ public: virtual const MCSection *getAttributesSection() const { return AttributesSection; } + + const MCSection * getStaticCtorSection(unsigned Priority) const; + const MCSection * getStaticDtorSection(unsigned Priority) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index 14d35ba..fda8536 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -17,9 +17,6 @@ #include "llvm/Support/TargetRegistry.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include <string> @@ -107,11 +104,9 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() { SetError(Lexer->getErrLoc(), Lexer->getErr()); break; case AsmToken::Identifier: { - std::string upperCase = lexedToken.getString().str(); - std::string lowerCase = LowercaseString(upperCase); - StringRef lowerRef(lowerCase); + std::string lowerCase = lexedToken.getString().lower(); - unsigned regID = MatchRegisterName(lowerRef); + unsigned regID = MatchRegisterName(lowerCase); // Check for register aliases. // r13 -> sp // r14 -> lr diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 24f15b4..e55a7da 100644 --- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -30,7 +30,6 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -40,9 +39,15 @@ namespace { class ARMOperand; +enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; + class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCRegisterInfo *MRI; + + // Map of register aliases registers via the .req directive. + StringMap<unsigned> RegisterReqs; struct { ARMCC::CondCodes Cond; // Condition for IT block. @@ -91,9 +96,14 @@ class ARMAsmParser : public MCTargetAsmParser { unsigned &ShiftAmount); bool parseDirectiveWord(unsigned Size, SMLoc L); bool parseDirectiveThumb(SMLoc L); + bool parseDirectiveARM(SMLoc L); bool parseDirectiveThumbFunc(SMLoc L); bool parseDirectiveCode(SMLoc L); bool parseDirectiveSyntax(SMLoc L); + bool parseDirectiveReq(StringRef Name, SMLoc L); + bool parseDirectiveUnreq(SMLoc L); + bool parseDirectiveArch(SMLoc L); + bool parseDirectiveEabiAttr(SMLoc L); StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, @@ -161,6 +171,8 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index); // Asm Match Converter Methods bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode, @@ -197,10 +209,18 @@ class ARMAsmParser : public MCTargetAsmParser { const SmallVectorImpl<MCParsedAsmOperand*> &); bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode, const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); bool validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); - void processInstruction(MCInst &Inst, + bool processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Ops); bool shouldOmitCCOutOperand(StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -217,6 +237,9 @@ public: : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); + // Cache the MCRegisterInfo. + MRI = &getContext().getRegisterInfo(); + // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -251,7 +274,6 @@ class ARMOperand : public MCParsedAsmOperand { k_CoprocReg, k_CoprocOption, k_Immediate, - k_FPImmediate, k_MemBarrierOpt, k_Memory, k_PostIndexRegister, @@ -262,6 +284,9 @@ class ARMOperand : public MCParsedAsmOperand { k_RegisterList, k_DPRRegisterList, k_SPRRegisterList, + k_VectorList, + k_VectorListAllLanes, + k_VectorListIndexed, k_ShiftedRegister, k_ShiftedImmediate, k_ShifterImmediate, @@ -311,6 +336,14 @@ class ARMOperand : public MCParsedAsmOperand { unsigned RegNum; } Reg; + // A vector register list is a sequential list of 1 to 4 registers. + struct { + unsigned RegNum; + unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; + } VectorList; + struct { unsigned Val; } VectorIndex; @@ -319,10 +352,6 @@ class ARMOperand : public MCParsedAsmOperand { const MCExpr *Val; } Imm; - struct { - unsigned Val; // encoded 8-bit representation - } FPImm; - /// Combined record for all forms of ARM address expressions. struct { unsigned BaseRegNum; @@ -333,7 +362,7 @@ class ARMOperand : public MCParsedAsmOperand { ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg unsigned ShiftImm; // shift for OffsetReg. unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (8, 16, or 32) + // n = alignment in bytes (2, 4, 8, 16, or 32) unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) } Memory; @@ -393,6 +422,11 @@ public: case k_SPRRegisterList: Registers = o.Registers; break; + case k_VectorList: + case k_VectorListAllLanes: + case k_VectorListIndexed: + VectorList = o.VectorList; + break; case k_CoprocNum: case k_CoprocReg: Cop = o.Cop; @@ -403,9 +437,6 @@ public: case k_Immediate: Imm = o.Imm; break; - case k_FPImmediate: - FPImm = o.FPImm; - break; case k_MemBarrierOpt: MBOpt = o.MBOpt; break; @@ -474,15 +505,10 @@ public: } const MCExpr *getImm() const { - assert(Kind == k_Immediate && "Invalid access!"); + assert(isImm() && "Invalid access!"); return Imm.Val; } - unsigned getFPImm() const { - assert(Kind == k_FPImmediate && "Invalid access!"); - return FPImm.Val; - } - unsigned getVectorIndex() const { assert(Kind == k_VectorIndex && "Invalid access!"); return VectorIndex.Val; @@ -511,90 +537,219 @@ public: bool isITMask() const { return Kind == k_ITCondMask; } bool isITCondCode() const { return Kind == k_CondCode; } bool isImm() const { return Kind == k_Immediate; } - bool isFPImm() const { return Kind == k_FPImmediate; } + bool isFPImm() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); + return Val != -1; + } + bool isFBits16() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value <= 16; + } + bool isFBits32() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 1 && Value <= 32; + } bool isImm8s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020; } bool isImm0_1020s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 1020; } bool isImm0_508s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 508; } + bool isImm0_508s4Neg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = -CE->getValue(); + // explicitly exclude zero. we want that to use the normal 0_508 version. + return ((Value & 3) == 0) && Value > 0 && Value <= 508; + } bool isImm0_255() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } + bool isImm0_4095() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 4096; + } + bool isImm0_4095Neg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = -CE->getValue(); + return Value > 0 && Value < 4096; + } + bool isImm0_1() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 2; + } + bool isImm0_3() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 4; + } bool isImm0_7() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 8; } bool isImm0_15() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 16; } bool isImm0_31() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } + bool isImm0_63() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 64; + } + bool isImm8() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 8; + } + bool isImm16() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 16; + } + bool isImm32() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value == 32; + } + bool isShrImm8() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 8; + } + bool isShrImm16() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 16; + } + bool isShrImm32() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 32; + } + bool isShrImm64() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= 64; + } + bool isImm1_7() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 8; + } + bool isImm1_15() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 16; + } + bool isImm1_31() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value < 32; + } bool isImm1_16() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 17; } bool isImm1_32() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 33; } + bool isImm0_32() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 33; + } bool isImm0_65535() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 65536; } bool isImm0_65535Expr() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // If it's not a constant expression, it'll generate a fixup and be // handled later. @@ -603,56 +758,81 @@ public: return Value >= 0 && Value < 65536; } bool isImm24bit() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value <= 0xffffff; } bool isImmThumbSR() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value < 33; } bool isPKHLSLImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value >= 0 && Value < 32; } bool isPKHASRImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return Value > 0 && Value <= 32; } bool isARMSOImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(Value) != -1; } + bool isARMSOImmNot() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(~Value) != -1; + } + bool isARMSOImmNeg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + // Only use this when not representable as a plain so_imm. + return ARM_AM::getSOImmVal(Value) == -1 && + ARM_AM::getSOImmVal(-Value) != -1; + } bool isT2SOImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(Value) != -1; } + bool isT2SOImmNot() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(~Value) != -1; + } + bool isT2SOImmNeg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + // Only use this when not representable as a plain so_imm. + return ARM_AM::getT2SOImmVal(Value) == -1 && + ARM_AM::getT2SOImmVal(-Value) != -1; + } bool isSetEndImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); @@ -672,7 +852,7 @@ public: bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } bool isPostIdxReg() const { - return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift; + return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; } bool isMemNoOffset(bool alignOK = false) const { if (!isMemory()) @@ -681,6 +861,17 @@ public: return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && (alignOK || Memory.Alignment == 0); } + bool isMemPCRelImm12() const { + if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) + return false; + // Base register must be PC. + if (Memory.BaseRegNum != ARM::PC) + return false; + // Immediate offset in range [-4095, 4095]. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); + } bool isAlignedMemory() const { return isMemNoOffset(true); } @@ -694,8 +885,7 @@ public: return Val > -4096 && Val < 4096; } bool isAM2OffsetImm() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; // Immediate offset in range [-4095, 4095]. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; @@ -703,6 +893,11 @@ public: return Val > -4096 && Val < 4096; } bool isAddrMode3() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; if (!isMemory() || Memory.Alignment != 0) return false; // No shifts are legal for AM3. if (Memory.ShiftType != ARM_AM::no_shift) return false; @@ -726,6 +921,11 @@ public: return (Val > -256 && Val < 256) || Val == INT32_MIN; } bool isAddrMode5() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; if (!isMemory() || Memory.Alignment != 0) return false; // Check for register offset. if (Memory.OffsetRegNum) return false; @@ -733,7 +933,7 @@ public: if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) || - Val == INT32_MIN; + Val == INT32_MIN; } bool isMemTBB() const { if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || @@ -810,6 +1010,11 @@ public: return Val >= 0 && Val <= 1020 && (Val % 4) == 0; } bool isMemImm8s4Offset() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset a multiple of 4 in range [-1020, 1020]. @@ -828,6 +1033,8 @@ public: bool isMemImm8Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; + // Base reg of PC isn't allowed for these encodings. + if (Memory.BaseRegNum == ARM::PC) return false; // Immediate offset in range [-255, 255]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); @@ -844,18 +1051,14 @@ public: bool isMemNegImm8Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; + // Base reg of PC isn't allowed for these encodings. + if (Memory.BaseRegNum == ARM::PC) return false; // Immediate offset in range [-255, -1]. - if (!Memory.OffsetImm) return true; + if (!Memory.OffsetImm) return false; int64_t Val = Memory.OffsetImm->getValue(); - return Val > -256 && Val < 0; + return (Val == INT32_MIN) || (Val > -256 && Val < 0); } bool isMemUImm12Offset() const { - // If we have an immediate that's not a constant, treat it as a label - // reference needing a fixup. If it is a constant, it's something else - // and we reject it. - if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) - return true; - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [0, 4095]. @@ -867,7 +1070,7 @@ public: // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else // and we reject it. - if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm())) + if (isImm() && !isa<MCConstantExpr>(getImm())) return true; if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -878,16 +1081,14 @@ public: return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); } bool isPostIdxImm8() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); return (Val > -256 && Val < 256) || (Val == INT32_MIN); } bool isPostIdxImm8s4() const { - if (Kind != k_Immediate) - return false; + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); @@ -898,6 +1099,188 @@ public: bool isMSRMask() const { return Kind == k_MSRMask; } bool isProcIFlags() const { return Kind == k_ProcIFlags; } + // NEON operands. + bool isSingleSpacedVectorList() const { + return Kind == k_VectorList && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorList() const { + return Kind == k_VectorList && VectorList.isDoubleSpaced; + } + bool isVecListOneD() const { + if (!isSingleSpacedVectorList()) return false; + return VectorList.Count == 1; + } + + bool isVecListDPair() const { + if (!isSingleSpacedVectorList()) return false; + return (ARMMCRegisterClasses[ARM::DPairRegClassID] + .contains(VectorList.RegNum)); + } + + bool isVecListThreeD() const { + if (!isSingleSpacedVectorList()) return false; + return VectorList.Count == 3; + } + + bool isVecListFourD() const { + if (!isSingleSpacedVectorList()) return false; + return VectorList.Count == 4; + } + + bool isVecListDPairSpaced() const { + if (isSingleSpacedVectorList()) return false; + return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID] + .contains(VectorList.RegNum)); + } + + bool isVecListThreeQ() const { + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 3; + } + + bool isVecListFourQ() const { + if (!isDoubleSpacedVectorList()) return false; + return VectorList.Count == 4; + } + + bool isSingleSpacedVectorAllLanes() const { + return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorAllLanes() const { + return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced; + } + bool isVecListOneDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 1; + } + + bool isVecListDPairAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return (ARMMCRegisterClasses[ARM::DPairRegClassID] + .contains(VectorList.RegNum)); + } + + bool isVecListDPairSpacedAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 2; + } + + bool isVecListThreeDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + + bool isVecListThreeQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 3; + } + + bool isVecListFourDAllLanes() const { + if (!isSingleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + + bool isVecListFourQAllLanes() const { + if (!isDoubleSpacedVectorAllLanes()) return false; + return VectorList.Count == 4; + } + + bool isSingleSpacedVectorIndexed() const { + return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced; + } + bool isDoubleSpacedVectorIndexed() const { + return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced; + } + bool isVecListOneDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 7; + } + + bool isVecListOneDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 3; + } + + bool isVecListOneDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 1 && VectorList.LaneIndex <= 1; + } + + bool isVecListTwoDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 7; + } + + bool isVecListTwoDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 3; + } + + bool isVecListTwoQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 1; + } + + bool isVecListTwoQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 3; + } + + bool isVecListTwoDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 2 && VectorList.LaneIndex <= 1; + } + + bool isVecListThreeDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 7; + } + + bool isVecListThreeDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 3; + } + + bool isVecListThreeQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 1; + } + + bool isVecListThreeQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 3; + } + + bool isVecListThreeDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 3 && VectorList.LaneIndex <= 1; + } + + bool isVecListFourDByteIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 7; + } + + bool isVecListFourDHWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourQWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + + bool isVecListFourQHWordIndexed() const { + if (!isDoubleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 3; + } + + bool isVecListFourDWordIndexed() const { + if (!isSingleSpacedVectorIndexed()) return false; + return VectorList.Count == 4 && VectorList.LaneIndex <= 1; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -911,7 +1294,82 @@ public: return VectorIndex.Val < 2; } + bool isNEONi8splat() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + int64_t Value = CE->getValue(); + // i8 value splatted across 8 bytes. The immediate is just the 8 byte + // value. + return Value >= 0 && Value < 256; + } + + bool isNEONi16splat() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + int64_t Value = CE->getValue(); + // i16 value in the range [0,255] or [0x0100, 0xff00] + return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00); + } + + bool isNEONi32splat() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + int64_t Value = CE->getValue(); + // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X. + return (Value >= 0 && Value < 256) || + (Value >= 0x0100 && Value <= 0xff00) || + (Value >= 0x010000 && Value <= 0xff0000) || + (Value >= 0x01000000 && Value <= 0xff000000); + } + bool isNEONi32vmov() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + int64_t Value = CE->getValue(); + // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, + // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. + return (Value >= 0 && Value < 256) || + (Value >= 0x0100 && Value <= 0xff00) || + (Value >= 0x010000 && Value <= 0xff0000) || + (Value >= 0x01000000 && Value <= 0xff000000) || + (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) || + (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff); + } + bool isNEONi32vmovNeg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + int64_t Value = ~CE->getValue(); + // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, + // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. + return (Value >= 0 && Value < 256) || + (Value >= 0x0100 && Value <= 0xff00) || + (Value >= 0x010000 && Value <= 0xff0000) || + (Value >= 0x01000000 && Value <= 0xff000000) || + (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) || + (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff); + } + + bool isNEONi64splat() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + uint64_t Value = CE->getValue(); + // i64 value with each byte being either 0 or 0xff. + for (unsigned i = 0; i < 8; ++i) + if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) return false; + return true; + } void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. @@ -967,7 +1425,8 @@ public: void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); - assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!"); + assert(isRegShiftedReg() && + "addRegShiftedRegOperands() on non RegShiftedReg!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg)); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg)); Inst.addOperand(MCOperand::CreateImm( @@ -976,7 +1435,8 @@ public: void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); - assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!"); + assert(isRegShiftedImm() && + "addRegShiftedImmOperands() on non RegShiftedImm!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg)); Inst.addOperand(MCOperand::CreateImm( ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm))); @@ -1026,9 +1486,23 @@ public: addExpr(Inst, getImm()); } + void addFBits16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(16 - CE->getValue())); + } + + void addFBits32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(32 - CE->getValue())); + } + void addFPImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getFPImm())); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue())); + Inst.addOperand(MCOperand::CreateImm(Val)); } void addImm8s4Operands(MCInst &Inst, unsigned N) const { @@ -1047,32 +1521,20 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); } - void addImm0_508s4Operands(MCInst &Inst, unsigned N) const { + void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored // in the MCInst as such. Lop off the low two bits here. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); + Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4))); } - void addImm0_255Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_7Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_15Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_31Operands(MCInst &Inst, unsigned N) const { + void addImm0_508s4Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); + // The immediate is scaled by four in the encoding and is stored + // in the MCInst as such. Lop off the low two bits here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); } void addImm1_16Operands(MCInst &Inst, unsigned N) const { @@ -1091,21 +1553,6 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); } - void addImm0_65535Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm24bitOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addImmThumbSROperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate, except for 32, which encodes as @@ -1115,11 +1562,6 @@ public: Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm))); } - void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addPKHASRImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // An ASR value of 32 encodes as 0, so that's how we want to add it to @@ -1129,19 +1571,44 @@ public: Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val)); } - void addARMSOImmOperands(MCInst &Inst, unsigned N) const { + void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); + // The operand is actually a t2_so_imm, but we have its bitwise + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } - void addT2SOImmOperands(MCInst &Inst, unsigned N) const { + void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); + // The operand is actually a t2_so_imm, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); } - void addSetEndImmOperands(MCInst &Inst, unsigned N) const { + void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); + // The operand is actually an imm0_4095, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a so_imm, but we have its bitwise + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); + } + + void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a so_imm, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); } void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { @@ -1154,6 +1621,14 @@ public: Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); } + void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + int32_t Imm = Memory.OffsetImm->getValue(); + // FIXME: Handle #-0 + if (Imm == INT32_MIN) Imm = 0; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); @@ -1196,6 +1671,16 @@ public: void addAddrMode3Operands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::CreateReg(0)); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; if (!Memory.OffsetRegNum) { ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; @@ -1237,6 +1722,15 @@ public: void addAddrMode5Operands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } + // The lower two bits are always zero and as such are not encoded. int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0; ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; @@ -1250,6 +1744,15 @@ public: void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } + int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -1281,7 +1784,7 @@ public: void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If this is an immediate, it's a label reference. - if (Kind == k_Immediate) { + if (isImm()) { addExpr(Inst, getImm()); Inst.addOperand(MCOperand::CreateImm(0)); return; @@ -1296,7 +1799,7 @@ public: void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If this is an immediate, it's a label reference. - if (Kind == k_Immediate) { + if (isImm()) { addExpr(Inst, getImm()); Inst.addOperand(MCOperand::CreateImm(0)); return; @@ -1322,8 +1825,9 @@ public: void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); - unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, - Memory.ShiftImm, Memory.ShiftType); + unsigned Val = + ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, + Memory.ShiftImm, Memory.ShiftType); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -1420,6 +1924,17 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); } + void addVecListOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + } + + void addVecListIndexedOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex)); + } + void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); @@ -1435,6 +1950,80 @@ public: Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); } + void addNEONi8splatOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + // Mask in that this is an i8 splat. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(CE->getValue() | 0xe00)); + } + + void addNEONi16splatOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + if (Value >= 256) + Value = (Value >> 8) | 0xa00; + else + Value |= 0x800; + Inst.addOperand(MCOperand::CreateImm(Value)); + } + + void addNEONi32splatOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + if (Value >= 256 && Value <= 0xff00) + Value = (Value >> 8) | 0x200; + else if (Value > 0xffff && Value <= 0xff0000) + Value = (Value >> 16) | 0x400; + else if (Value > 0xffffff) + Value = (Value >> 24) | 0x600; + Inst.addOperand(MCOperand::CreateImm(Value)); + } + + void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + if (Value >= 256 && Value <= 0xffff) + Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200); + else if (Value > 0xffff && Value <= 0xffffff) + Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400); + else if (Value > 0xffffff) + Value = (Value >> 24) | 0x600; + Inst.addOperand(MCOperand::CreateImm(Value)); + } + + void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = ~CE->getValue(); + if (Value >= 256 && Value <= 0xffff) + Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200); + else if (Value > 0xffff && Value <= 0xffffff) + Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400); + else if (Value > 0xffffff) + Value = (Value >> 24) | 0x600; + Inst.addOperand(MCOperand::CreateImm(Value)); + } + + void addNEONi64splatOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint64_t Value = CE->getValue(); + unsigned Imm = 0; + for (unsigned i = 0; i < 8; ++i, Value >>= 8) { + Imm |= (Value & 1) << i; + } + Inst.addOperand(MCOperand::CreateImm(Imm | 0x1e00)); + } + virtual void print(raw_ostream &OS) const; static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) { @@ -1579,6 +2168,43 @@ public: return Op; } + static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count, + bool isDoubleSpaced, SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorList); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count, + bool isDoubleSpaced, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorListAllLanes); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count, + unsigned Index, + bool isDoubleSpaced, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(k_VectorListIndexed); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->VectorList.LaneIndex = Index; + Op->VectorList.isDoubleSpaced = isDoubleSpaced; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { ARMOperand *Op = new ARMOperand(k_VectorIndex); @@ -1596,14 +2222,6 @@ public: return Op; } - static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { - ARMOperand *Op = new ARMOperand(k_FPImmediate); - Op->FPImm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - static ARMOperand *CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm, unsigned OffsetRegNum, @@ -1668,10 +2286,6 @@ public: void ARMOperand::print(raw_ostream &OS) const { switch (Kind) { - case k_FPImmediate: - OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm()) - << ") >"; - break; case k_CondCode: OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">"; break; @@ -1679,9 +2293,10 @@ void ARMOperand::print(raw_ostream &OS) const { OS << "<ccout " << getReg() << ">"; break; case k_ITCondMask: { - static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)", - "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)", - "(tee)", "(eee)" }; + static const char *MaskStr[] = { + "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)", + "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)" + }; assert((ITMask.Mask & 0xf) == ITMask.Mask); OS << "<it-mask " << MaskStr[ITMask.Mask] << ">"; break; @@ -1735,18 +2350,15 @@ void ARMOperand::print(raw_ostream &OS) const { break; case k_ShiftedRegister: OS << "<so_reg_reg " - << RegShiftedReg.SrcReg - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm)) - << ", " << RegShiftedReg.ShiftReg << ", " - << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm) - << ">"; + << RegShiftedReg.SrcReg << " " + << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy) + << " " << RegShiftedReg.ShiftReg << ">"; break; case k_ShiftedImmediate: OS << "<so_reg_imm " - << RegShiftedImm.SrcReg - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm)) - << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm) - << ">"; + << RegShiftedImm.SrcReg << " " + << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy) + << " #" << RegShiftedImm.ShiftImm << ">"; break; case k_RotateImmediate: OS << "<ror " << " #" << (RotImm.Imm * 8) << ">"; @@ -1770,6 +2382,18 @@ void ARMOperand::print(raw_ostream &OS) const { OS << ">"; break; } + case k_VectorList: + OS << "<vector_list " << VectorList.Count << " * " + << VectorList.RegNum << ">"; + break; + case k_VectorListAllLanes: + OS << "<vector_list(all lanes) " << VectorList.Count << " * " + << VectorList.RegNum << ">"; + break; + case k_VectorListIndexed: + OS << "<vector_list(lane " << VectorList.LaneIndex << ") " + << VectorList.Count << " * " << VectorList.RegNum << ">"; + break; case k_Token: OS << "'" << getToken() << "'"; break; @@ -1788,7 +2412,9 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { + StartLoc = Parser.getTok().getLoc(); RegNo = tryParseRegister(); + EndLoc = Parser.getTok().getLoc(); return (RegNo == (unsigned)-1); } @@ -1801,10 +2427,7 @@ int ARMAsmParser::tryParseRegister() { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return -1; - // FIXME: Validate register for the current architecture; we have to do - // validation later, so maybe there is no need for this here. - std::string upperCase = Tok.getString().str(); - std::string lowerCase = LowercaseString(upperCase); + std::string lowerCase = Tok.getString().lower(); unsigned RegNum = MatchRegisterName(lowerCase); if (!RegNum) { RegNum = StringSwitch<unsigned>(lowerCase) @@ -1812,44 +2435,38 @@ int ARMAsmParser::tryParseRegister() { .Case("r14", ARM::LR) .Case("r15", ARM::PC) .Case("ip", ARM::R12) + // Additional register name aliases for 'gas' compatibility. + .Case("a1", ARM::R0) + .Case("a2", ARM::R1) + .Case("a3", ARM::R2) + .Case("a4", ARM::R3) + .Case("v1", ARM::R4) + .Case("v2", ARM::R5) + .Case("v3", ARM::R6) + .Case("v4", ARM::R7) + .Case("v5", ARM::R8) + .Case("v6", ARM::R9) + .Case("v7", ARM::R10) + .Case("v8", ARM::R11) + .Case("sb", ARM::R9) + .Case("sl", ARM::R10) + .Case("fp", ARM::R11) .Default(0); } - if (!RegNum) return -1; + if (!RegNum) { + // Check for aliases registered via .req. Canonicalize to lower case. + // That's more consistent since register names are case insensitive, and + // it's how the original entry was passed in from MC/MCParser/AsmParser. + StringMap<unsigned>::const_iterator Entry = RegisterReqs.find(lowerCase); + // If no match, return failure. + if (Entry == RegisterReqs.end()) + return -1; + Parser.Lex(); // Eat identifier token. + return Entry->getValue(); + } Parser.Lex(); // Eat identifier token. -#if 0 - // Also check for an index operand. This is only legal for vector registers, - // but that'll get caught OK in operand matching, so we don't need to - // explicitly filter everything else out here. - if (Parser.getTok().is(AsmToken::LBrac)) { - SMLoc SIdx = Parser.getTok().getLoc(); - Parser.Lex(); // Eat left bracket token. - - const MCExpr *ImmVal; - SMLoc ExprLoc = Parser.getTok().getLoc(); - if (getParser().ParseExpression(ImmVal)) - return MatchOperand_ParseFail; - const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return MatchOperand_ParseFail; - } - - SMLoc E = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), - SIdx, E, - getContext())); - } -#endif - return RegNum; } @@ -1864,9 +2481,9 @@ int ARMAsmParser::tryParseShiftRegister( const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); - std::string upperCase = Tok.getString().str(); - std::string lowerCase = LowercaseString(upperCase); + std::string lowerCase = Tok.getString().lower(); ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase) + .Case("asl", ARM_AM::lsl) .Case("lsl", ARM_AM::lsl) .Case("lsr", ARM_AM::lsr) .Case("asr", ARM_AM::asr) @@ -1895,7 +2512,8 @@ int ARMAsmParser::tryParseShiftRegister( ShiftReg = SrcReg; } else { // Figure out if this is shifted by a constant or a register (for non-RRX). - if (Parser.getTok().is(AsmToken::Hash)) { + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); const MCExpr *ShiftExpr = 0; @@ -1919,6 +2537,10 @@ int ARMAsmParser::tryParseShiftRegister( Error(ImmLoc, "immediate shift value out of range"); return -1; } + // shift by zero is a nop. Always send it through as lsl. + // ('as' compatibility) + if (Imm == 0) + ShiftTy = ARM_AM::lsl; } else if (Parser.getTok().is(AsmToken::Identifier)) { ShiftReg = tryParseRegister(); SMLoc L = Parser.getTok().getLoc(); @@ -1976,20 +2598,15 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat left bracket token. const MCExpr *ImmVal; - SMLoc ExprLoc = Parser.getTok().getLoc(); if (getParser().ParseExpression(ImmVal)) - return MatchOperand_ParseFail; + return true; const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return MatchOperand_ParseFail; - } + if (!MCE) + return TokError("immediate value expected for vector index"); SMLoc E = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } + if (Parser.getTok().isNot(AsmToken::RBrac)) + return Error(E, "']' expected"); Parser.Lex(); // Eat right bracket token. @@ -2008,7 +2625,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { // Use the same layout as the tablegen'erated register name matcher. Ugly, // but efficient. switch (Name.size()) { - default: break; + default: return -1; case 2: if (Name[0] != CoprocOp) return -1; @@ -2025,7 +2642,6 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { case '8': return 8; case '9': return 9; } - break; case 3: if (Name[0] != CoprocOp || Name[1] != '1') return -1; @@ -2038,10 +2654,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { case '4': return 14; case '5': return 15; } - break; } - - return -1; } /// parseITCondCode - Try to parse a condition code for an IT instruction. @@ -2161,7 +2774,7 @@ static unsigned getNextRegister(unsigned Reg) { if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) return Reg + 1; switch(Reg) { - default: assert(0 && "Invalid GPR number!"); + default: llvm_unreachable("Invalid GPR number!"); case ARM::R0: return ARM::R1; case ARM::R1: return ARM::R2; case ARM::R2: return ARM::R3; case ARM::R3: return ARM::R4; case ARM::R4: return ARM::R5; case ARM::R5: return ARM::R6; @@ -2173,6 +2786,29 @@ static unsigned getNextRegister(unsigned Reg) { } } +// Return the low-subreg of a given Q register. +static unsigned getDRegFromQReg(unsigned QReg) { + switch (QReg) { + default: llvm_unreachable("expected a Q register!"); + case ARM::Q0: return ARM::D0; + case ARM::Q1: return ARM::D2; + case ARM::Q2: return ARM::D4; + case ARM::Q3: return ARM::D6; + case ARM::Q4: return ARM::D8; + case ARM::Q5: return ARM::D10; + case ARM::Q6: return ARM::D12; + case ARM::Q7: return ARM::D14; + case ARM::Q8: return ARM::D16; + case ARM::Q9: return ARM::D18; + case ARM::Q10: return ARM::D20; + case ARM::Q11: return ARM::D22; + case ARM::Q12: return ARM::D24; + case ARM::Q13: return ARM::D26; + case ARM::Q14: return ARM::D28; + case ARM::Q15: return ARM::D30; + } +} + /// Parse a register list. bool ARMAsmParser:: parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -2188,7 +2824,17 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (Reg == -1) return Error(RegLoc, "register expected"); - MCRegisterClass *RC; + // The reglist instructions have at most 16 registers, so reserve + // space for that many. + SmallVector<std::pair<unsigned, SMLoc>, 16> Registers; + + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + ++Reg; + } + const MCRegisterClass *RC; if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) RC = &ARMMCRegisterClasses[ARM::GPRRegClassID]; else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) @@ -2198,10 +2844,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { else return Error(RegLoc, "invalid register in register list"); - // The reglist instructions have at most 16 registers, so reserve - // space for that many. - SmallVector<std::pair<unsigned, SMLoc>, 16> Registers; - // Store the first register. + // Store the register. Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); // This starts immediately after the first register token in the list, @@ -2210,11 +2853,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { while (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { - Parser.Lex(); // Eat the comma. + Parser.Lex(); // Eat the minus. SMLoc EndLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) return Error(EndLoc, "register expected"); + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) + EndReg = getDRegFromQReg(EndReg) + 1; // If the register is the same as the start reg, there's nothing // more to do. if (Reg == EndReg) @@ -2236,15 +2882,31 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); int OldReg = Reg; + const AsmToken RegTok = Parser.getTok(); Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); + // Allow Q regs and just interpret them as the two D sub-registers. + bool isQReg = false; + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + isQReg = true; + } // The register must be in the same register class as the first. if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg)) - return Error(RegLoc, "register list not in ascending order"); + if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) { + if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + Warning(RegLoc, "register list not in ascending order"); + else + return Error(RegLoc, "register list not in ascending order"); + } + if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { + Warning(RegLoc, "duplicated register (" + RegTok.getString() + + ") in register list"); + continue; + } // VFP register lists must also be contiguous. // It's OK to use the enumeration values directly here rather, as the // VFP register classes have the enum sorted properly. @@ -2252,6 +2914,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc)); + if (isQReg) + Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc)); } SMLoc E = Parser.getTok().getLoc(); @@ -2259,10 +2923,319 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return Error(E, "'}' expected"); Parser.Lex(); // Eat '}' token. + // Push the register list operand. Operands.push_back(ARMOperand::CreateRegList(Registers, S, E)); + + // The ARM system instruction variants for LDM/STM have a '^' token here. + if (Parser.getTok().is(AsmToken::Caret)) { + Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc())); + Parser.Lex(); // Eat '^' token. + } + return false; } +// Helper function to parse the lane index for vector lists. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { + Index = 0; // Always return a defined index value. + if (Parser.getTok().is(AsmToken::LBrac)) { + Parser.Lex(); // Eat the '['. + if (Parser.getTok().is(AsmToken::RBrac)) { + // "Dn[]" is the 'all lanes' syntax. + LaneKind = AllLanes; + Parser.Lex(); // Eat the ']'. + return MatchOperand_Success; + } + + // There's an optional '#' token here. Normally there wouldn't be, but + // inline assemble puts one in, and it's friendly to accept that. + if (Parser.getTok().is(AsmToken::Hash)) + Parser.Lex(); // Eat the '#' + + const MCExpr *LaneIndex; + SMLoc Loc = Parser.getTok().getLoc(); + if (getParser().ParseExpression(LaneIndex)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; + } + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex); + if (!CE) { + Error(Loc, "lane index must be empty or an integer"); + return MatchOperand_ParseFail; + } + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(Parser.getTok().getLoc(), "']' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the ']'. + int64_t Val = CE->getValue(); + + // FIXME: Make this range check context sensitive for .8, .16, .32. + if (Val < 0 || Val > 7) { + Error(Parser.getTok().getLoc(), "lane index out of range"); + return MatchOperand_ParseFail; + } + Index = Val; + LaneKind = IndexedLane; + return MatchOperand_Success; + } + LaneKind = NoLanes; + return MatchOperand_Success; +} + +// parse a vector register list +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + VectorLaneTy LaneKind; + unsigned LaneIndex; + SMLoc S = Parser.getTok().getLoc(); + // As an extension (to match gas), support a plain D register or Q register + // (without encosing curly braces) as a single or double entry list, + // respectively. + if (Parser.getTok().is(AsmToken::Identifier)) { + int Reg = tryParseRegister(); + if (Reg == -1) + return MatchOperand_NoMatch; + SMLoc E = Parser.getTok().getLoc(); + if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + if (Res != MatchOperand_Success) + return Res; + switch (LaneKind) { + case NoLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E)); + break; + case AllLanes: + E = Parser.getTok().getLoc(); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false, + S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1, + LaneIndex, + false, S, E)); + break; + } + return MatchOperand_Success; + } + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex); + if (Res != MatchOperand_Success) + return Res; + switch (LaneKind) { + case NoLanes: + E = Parser.getTok().getLoc(); + Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, + &ARMMCRegisterClasses[ARM::DPairRegClassID]); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); + break; + case AllLanes: + E = Parser.getTok().getLoc(); + Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, + &ARMMCRegisterClasses[ARM::DPairRegClassID]); + Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, + S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2, + LaneIndex, + false, S, E)); + break; + } + return MatchOperand_Success; + } + Error(S, "vector register expected"); + return MatchOperand_ParseFail; + } + + if (Parser.getTok().isNot(AsmToken::LCurly)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat '{' token. + SMLoc RegLoc = Parser.getTok().getLoc(); + + int Reg = tryParseRegister(); + if (Reg == -1) { + Error(RegLoc, "register expected"); + return MatchOperand_ParseFail; + } + unsigned Count = 1; + int Spacing = 0; + unsigned FirstReg = Reg; + // The list is of D registers, but we also allow Q regs and just interpret + // them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + FirstReg = Reg = getDRegFromQReg(Reg); + Spacing = 1; // double-spacing requires explicit D registers, otherwise + // it's ambiguous with four-register single spaced. + ++Reg; + ++Count; + } + if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + + while (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::Minus)) { + if (Parser.getTok().is(AsmToken::Minus)) { + if (!Spacing) + Spacing = 1; // Register range implies a single spaced list. + else if (Spacing == 2) { + Error(Parser.getTok().getLoc(), + "sequential registers in double spaced list"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat the minus. + SMLoc EndLoc = Parser.getTok().getLoc(); + int EndReg = tryParseRegister(); + if (EndReg == -1) { + Error(EndLoc, "register expected"); + return MatchOperand_ParseFail; + } + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) + EndReg = getDRegFromQReg(EndReg) + 1; + // If the register is the same as the start reg, there's nothing + // more to do. + if (Reg == EndReg) + continue; + // The register must be in the same register class as the first. + if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) { + Error(EndLoc, "invalid register in register list"); + return MatchOperand_ParseFail; + } + // Ranges must go from low to high. + if (Reg > EndReg) { + Error(EndLoc, "bad range in register list"); + return MatchOperand_ParseFail; + } + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + EndLoc = Parser.getTok().getLoc(); + + // Add all the registers in the range to the register list. + Count += EndReg - Reg; + Reg = EndReg; + continue; + } + Parser.Lex(); // Eat the comma. + RegLoc = Parser.getTok().getLoc(); + int OldReg = Reg; + Reg = tryParseRegister(); + if (Reg == -1) { + Error(RegLoc, "register expected"); + return MatchOperand_ParseFail; + } + // vector register lists must be contiguous. + // It's OK to use the enumeration values directly here rather, as the + // VFP register classes have the enum sorted properly. + // + // The list is of D registers, but we also allow Q regs and just interpret + // them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + if (!Spacing) + Spacing = 1; // Register range implies a single spaced list. + else if (Spacing == 2) { + Error(RegLoc, + "invalid register in double-spaced list (must be 'D' register')"); + return MatchOperand_ParseFail; + } + Reg = getDRegFromQReg(Reg); + if (Reg != OldReg + 1) { + Error(RegLoc, "non-contiguous register range"); + return MatchOperand_ParseFail; + } + ++Reg; + Count += 2; + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + SMLoc EndLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + continue; + } + // Normal D register. + // Figure out the register spacing (single or double) of the list if + // we don't know it already. + if (!Spacing) + Spacing = 1 + (Reg == OldReg + 2); + + // Just check that it's contiguous and keep going. + if (Reg != OldReg + Spacing) { + Error(RegLoc, "non-contiguous register range"); + return MatchOperand_ParseFail; + } + ++Count; + // Parse the lane specifier if present. + VectorLaneTy NextLaneKind; + unsigned NextLaneIndex; + SMLoc EndLoc = Parser.getTok().getLoc(); + if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success) + return MatchOperand_ParseFail; + if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) { + Error(EndLoc, "mismatched lane index in register list"); + return MatchOperand_ParseFail; + } + } + + SMLoc E = Parser.getTok().getLoc(); + if (Parser.getTok().isNot(AsmToken::RCurly)) { + Error(E, "'}' expected"); + return MatchOperand_ParseFail; + } + Parser.Lex(); // Eat '}' token. + + switch (LaneKind) { + case NoLanes: + // Two-register operands have been converted to the + // composite register classes. + if (Count == 2) { + const MCRegisterClass *RC = (Spacing == 1) ? + &ARMMCRegisterClasses[ARM::DPairRegClassID] : + &ARMMCRegisterClasses[ARM::DPairSpcRegClassID]; + FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC); + } + + Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, + (Spacing == 2), S, E)); + break; + case AllLanes: + // Two-register operands have been converted to the + // composite register classes. + if (Count == 2) { + const MCRegisterClass *RC = (Spacing == 1) ? + &ARMMCRegisterClasses[ARM::DPairRegClassID] : + &ARMMCRegisterClasses[ARM::DPairSpcRegClassID]; + FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC); + } + Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, + (Spacing == 2), + S, E)); + break; + case IndexedLane: + Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count, + LaneIndex, + (Spacing == 2), + S, E)); + break; + } + return MatchOperand_Success; +} + /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { @@ -2337,7 +3310,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (isMClass()) { // See ARMv6-M 10.1.1 - unsigned FlagsVal = StringSwitch<unsigned>(Mask) + std::string Name = Mask.lower(); + unsigned FlagsVal = StringSwitch<unsigned>(Name) .Case("apsr", 0) .Case("iapsr", 1) .Case("eapsr", 2) @@ -2353,14 +3327,14 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { .Case("faultmask", 19) .Case("control", 20) .Default(~0U); - + if (FlagsVal == ~0U) return MatchOperand_NoMatch; if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19) // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; - + Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S)); return MatchOperand_Success; @@ -2369,7 +3343,7 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf" size_t Start = 0, Next = Mask.find('_'); StringRef Flags = ""; - std::string SpecReg = LowercaseString(Mask.slice(Start, Next)); + std::string SpecReg = Mask.slice(Start, Next).lower(); if (Next != StringRef::npos) Flags = Mask.slice(Next+1, Mask.size()); @@ -2392,7 +3366,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { FlagsVal = 8; // No flag } } else if (SpecReg == "cpsr" || SpecReg == "spsr") { - if (Flags == "all") // cpsr_all is an alias for cpsr_fc + // cpsr_all is an alias for cpsr_fc, as is plain cpsr. + if (Flags == "all" || Flags == "") Flags = "fc"; for (int i = 0, e = Flags.size(); i != e; ++i) { unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1)) @@ -2411,9 +3386,13 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } else // No match for special register. return MatchOperand_NoMatch; - // Special register without flags are equivalent to "fc" flags. - if (!FlagsVal) - FlagsVal = 0x9; + // Special register without flags is NOT equivalent to "fc" flags. + // NOTE: This is a divergence from gas' behavior. Uncommenting the following + // two lines would enable gas compatibility at the expense of breaking + // round-tripping. + // + // if (!FlagsVal) + // FlagsVal = 0x9; // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1) if (SpecReg == "spsr") @@ -2433,8 +3412,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, return MatchOperand_ParseFail; } StringRef ShiftName = Tok.getString(); - std::string LowerOp = LowercaseString(Op); - std::string UpperOp = UppercaseString(Op); + std::string LowerOp = Op.lower(); + std::string UpperOp = Op.upper(); if (ShiftName != LowerOp && ShiftName != UpperOp) { Error(Parser.getTok().getLoc(), Op + " operand expected."); return MatchOperand_ParseFail; @@ -2442,7 +3421,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, Parser.Lex(); // Eat shift type token. // There must be a '#' and a shift amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2520,7 +3500,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the operator. // A '#' and a shift amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2580,7 +3561,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); // Eat the operator. // A '#' and a rotate amount. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2617,7 +3599,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); // The bitfield descriptor is really two operands, the LSB and the width. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2649,7 +3632,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_ParseFail; } Parser.Lex(); // Eat hash token. - if (Parser.getTok().isNot(AsmToken::Hash)) { + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) { Error(Parser.getTok().getLoc(), "'#' expected"); return MatchOperand_ParseFail; } @@ -2743,7 +3727,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Tok.getLoc(); // Do immediates first, as we always parse those if we have a '#'. - if (Parser.getTok().is(AsmToken::Hash)) { + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat the '#'. // Explicitly look for a '-', as we need to encode negative zero // differently. @@ -3082,18 +4067,80 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode, } ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); - ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1); - // If we have a three-operand form, use that, else the second source operand - // is just the destination operand again. - if (Operands.size() == 6) - ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); - else - Inst.addOperand(Inst.getOperand(0)); + // If we have a three-operand form, make sure to set Rn to be the operand + // that isn't the same as Rd. + unsigned RegOp = 4; + if (Operands.size() == 6 && + ((ARMOperand*)Operands[4])->getReg() == + ((ARMOperand*)Operands[3])->getReg()) + RegOp = 5; + ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1); + Inst.addOperand(Inst.getOperand(0)); ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); return true; } +bool ARMAsmParser:: +cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Vd + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Vn + ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +bool ARMAsmParser:: +cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Vd + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Vn + ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); + // Vm + ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +bool ARMAsmParser:: +cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Vn + ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); + // Vt + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +bool ARMAsmParser:: +cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Vn + ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); + // Vm + ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); + // Vt + ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. bool ARMAsmParser:: @@ -3153,7 +4200,10 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { unsigned Align = 0; switch (CE->getValue()) { default: - return Error(E, "alignment specifier must be 64, 128, or 256 bits"); + return Error(E, + "alignment specifier must be 16, 32, 64, 128, or 256 bits"); + case 16: Align = 2; break; + case 32: Align = 4; break; case 64: Align = 8; break; case 128: Align = 16; break; case 256: Align = 32; break; @@ -3182,9 +4232,13 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // If we have a '#', it's an immediate offset, else assume it's a register - // offset. - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat the '#'. + // offset. Be friendly and also accept a plain integer (without a leading + // hash) for gas compatibility. + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar) || + Parser.getTok().is(AsmToken::Integer)) { + if (Parser.getTok().isNot(AsmToken::Integer)) + Parser.Lex(); // Eat the '#'. E = Parser.getTok().getLoc(); bool isNegative = getParser().getTok().is(AsmToken::Minus); @@ -3281,7 +4335,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, if (Tok.isNot(AsmToken::Identifier)) return true; StringRef ShiftName = Tok.getString(); - if (ShiftName == "lsl" || ShiftName == "LSL") + if (ShiftName == "lsl" || ShiftName == "LSL" || + ShiftName == "asl" || ShiftName == "ASL") St = ARM_AM::lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") St = ARM_AM::lsr; @@ -3301,7 +4356,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, Loc = Parser.getTok().getLoc(); // A '#' and a shift amount. const AsmToken &HashTok = Parser.getTok(); - if (HashTok.isNot(AsmToken::Hash)) + if (HashTok.isNot(AsmToken::Hash) && + HashTok.isNot(AsmToken::Dollar)) return Error(HashTok.getLoc(), "'#' expected"); Parser.Lex(); // Eat hash token. @@ -3328,10 +4384,36 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, /// parseFPImm - A floating point immediate expression operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Anything that can accept a floating point constant as an operand + // needs to go through here, as the regular ParseExpression is + // integer only. + // + // This routine still creates a generic Immediate operand, containing + // a bitcast of the 64-bit floating point value. The various operands + // that accept floats can check whether the value is valid for them + // via the standard is*() predicates. + SMLoc S = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Hash)) + if (Parser.getTok().isNot(AsmToken::Hash) && + Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; + + // Disambiguate the VMOV forms that can accept an FP immediate. + // vmov.f32 <sreg>, #imm + // vmov.f64 <dreg>, #imm + // vmov.f32 <dreg>, #imm @ vector f32x2 + // vmov.f32 <qreg>, #imm @ vector f32x4 + // + // There are also the NEON VMOV instructions which expect an + // integer constant. Make sure we don't try to parse an FPImm + // for these: + // vmov.i{8|16|32|64} <dreg|qreg>, #imm + ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]); + if (!TyOp->isToken() || (TyOp->getToken() != ".f32" && + TyOp->getToken() != ".f64")) + return MatchOperand_NoMatch; + Parser.Lex(); // Eat the '#'. // Handle negation, as that still comes through as a separate token. @@ -3341,34 +4423,39 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { Parser.Lex(); } const AsmToken &Tok = Parser.getTok(); + SMLoc Loc = Tok.getLoc(); if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + APFloat RealVal(APFloat::IEEEsingle, Tok.getString()); uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; - int Val = ARM_AM::getFP64Imm(APInt(64, IntVal)); + IntVal ^= (uint64_t)isNegative << 31; Parser.Lex(); // Eat the token. - if (Val == -1) { - TokError("floating point value out of range"); - return MatchOperand_ParseFail; - } - Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + Operands.push_back(ARMOperand::CreateImm( + MCConstantExpr::Create(IntVal, getContext()), + S, Parser.getTok().getLoc())); return MatchOperand_Success; } + // Also handle plain integers. Instructions which allow floating point + // immediates also allow a raw encoded 8-bit value. if (Tok.is(AsmToken::Integer)) { int64_t Val = Tok.getIntVal(); Parser.Lex(); // Eat the token. if (Val > 255 || Val < 0) { - TokError("encoded floating point value out of range"); + Error(Loc, "encoded floating point value out of range"); return MatchOperand_ParseFail; } - Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext())); + double RealVal = ARM_AM::getFPImmFloat(Val); + Val = APFloat(APFloat::IEEEdouble, RealVal).bitcastToAPInt().getZExtValue(); + Operands.push_back(ARMOperand::CreateImm( + MCConstantExpr::Create(Val, getContext()), S, + Parser.getTok().getLoc())); return MatchOperand_Success; } - TokError("invalid floating point immediate"); + Error(Loc, "invalid floating point immediate"); return MatchOperand_ParseFail; } + /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, @@ -3391,7 +4478,6 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - // If this is VMRS, check for the apsr_nzcv operand. if (!tryParseRegisterWithWriteBack(Operands)) return false; int Res = tryParseShiftRegister(Operands); @@ -3399,17 +4485,21 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return false; else if (Res == -1) // irrecoverable error return true; - if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") { + // If this is VMRS, check for the apsr_nzcv operand. + if (Mnemonic == "vmrs" && + Parser.getTok().getString().equals_lower("apsr_nzcv")) { S = Parser.getTok().getLoc(); Parser.Lex(); - Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S)); + Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); return false; } // Fall though for the Identifier case that is not a register or a // special name. } + case AsmToken::LParen: // parenthesized expressions like (_strcmp-4) case AsmToken::Integer: // things like 1f and 2b as a branch targets + case AsmToken::String: // quoted label names. case AsmToken::Dot: { // . as a branch target // This was not a register so parse other operands that start with an // identifier (like labels) as expressions and create them as immediates. @@ -3425,6 +4515,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return parseMemory(Operands); case AsmToken::LCurly: return parseRegisterList(Operands); + case AsmToken::Dollar: case AsmToken::Hash: { // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate @@ -3435,13 +4526,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, if (getParser().ParseExpression(ImmVal)) return true; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); - if (!CE) { - Error(S, "constant expression expected"); - return MatchOperand_ParseFail; + if (CE) { + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); } - int32_t Val = CE->getValue(); - if (isNegative && Val == 0) - ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); return false; @@ -3524,7 +4613,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" || Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" || - Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal") + Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" || + Mnemonic == "fmuls") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -3565,7 +4655,11 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" || - Mnemonic == "vrsqrts" || Mnemonic == "srs" || + Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" || + Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || + Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || + Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || + Mnemonic == "vfms" || Mnemonic == "vfnms" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -3609,6 +4703,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "orr" || Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || + Mnemonic == "vfm" || Mnemonic == "vfnm" || (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" || Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"))) { @@ -3677,7 +4772,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[4])->isReg() && static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP && static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - (static_cast<ARMOperand*>(Operands[5])->isReg() || + ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) || static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4())) return true; // For Thumb2, add/sub immediate does not have a cc_out operand for the @@ -3694,9 +4789,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // // If either register is a high reg, it's either one of the SP // variants (handled above) or a 32-bit encoding, so we just - // check against T3. + // check against T3. If the second register is the PC, this is an + // alternate form of ADR, which uses encoding T4, so check for that too. if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) && + static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) return false; // If both registers are low, we're in an IT block, and the immediate is @@ -3726,6 +4823,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // remove the cc_out operand. (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) || !inITBlock() || (static_cast<ARMOperand*>(Operands[3])->getReg() != static_cast<ARMOperand*>(Operands[5])->getReg() && @@ -3733,6 +4831,20 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[4])->getReg()))) return true; + // Also check the 'mul' syntax variant that doesn't specify an explicit + // destination register. + if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + // If the registers aren't low regs or the cc_out operand is zero + // outside of an IT block, we have to use the 32-bit encoding, so + // remove the cc_out operand. + (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + !inITBlock())) + return true; + // Register-register 'add/sub' for thumb does not have a cc_out operand @@ -3744,15 +4856,52 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, (Operands.size() == 5 || Operands.size() == 6) && static_cast<ARMOperand*>(Operands[3])->isReg() && static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + (static_cast<ARMOperand*>(Operands[4])->isImm() || + (Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[5])->isImm()))) return true; return false; } +static bool isDataTypeToken(StringRef Tok) { + return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" || + Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" || + Tok == ".u8" || Tok == ".u16" || Tok == ".u32" || Tok == ".u64" || + Tok == ".s8" || Tok == ".s16" || Tok == ".s32" || Tok == ".s64" || + Tok == ".p8" || Tok == ".p16" || Tok == ".f32" || Tok == ".f64" || + Tok == ".f" || Tok == ".d"; +} + +// FIXME: This bit should probably be handled via an explicit match class +// in the .td files that matches the suffix instead of having it be +// a literal string token the way it is now. +static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { + return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); +} + +static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features); /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Apply mnemonic aliases before doing anything else, as the destination + // mnemnonic may include suffices and we want to handle them normally. + // The generic tblgen'erated code does this later, at the start of + // MatchInstructionImpl(), but that's too late for aliases that include + // any sort of suffix. + unsigned AvailableFeatures = getAvailableFeatures(); + applyMnemonicAliases(Name, AvailableFeatures); + + // First check for the ARM-specific .req directive. + if (Parser.getTok().is(AsmToken::Identifier) && + Parser.getTok().getIdentifier() == ".req") { + parseDirectiveReq(Name, NameLoc); + // We always return 'error' for this, as we're done with this + // statement and don't need to match the 'instruction." + return true; + } + // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); StringRef Mnemonic = Name.slice(Start, Next); @@ -3854,9 +5003,12 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Next = Name.find('.', Start + 1); StringRef ExtraToken = Name.slice(Start, Next); - // For now, we're only parsing Thumb1 (for the most part), so - // just ignore ".n" qualifiers. We'll use them to restrict - // matching when we do Thumb2. + // Some NEON instructions have an optional datatype suffix that is + // completely ignored. Check for that. + if (isDataTypeToken(ExtraToken) && + doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken)) + continue; + if (ExtraToken != ".n") { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); @@ -3941,12 +5093,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, } } // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the - // end. Convert it to a token here. + // end. Convert it to a token here. Take care not to convert those + // that should hit the Thumb2 encoding. if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && static_cast<ARMOperand*>(Operands[5])->isImm()) { ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]); const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); - if (CE && CE->getValue() == 0) { + if (CE && CE->getValue() == 0 && + (isThumbOne() || + // The cc_out operand matches the IT block. + ((inITBlock() != CarrySetting) && + // Neither register operand is a high register. + (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && + isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){ Operands.erase(Operands.begin() + 5); Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc())); delete Op; @@ -3990,9 +5151,9 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) { // the ARMInsts array) instead. Getting that here requires awkward // API changes, though. Better way? namespace llvm { -extern MCInstrDesc ARMInsts[]; +extern const MCInstrDesc ARMInsts[]; } -static MCInstrDesc &getInstDesc(unsigned Opcode) { +static const MCInstrDesc &getInstDesc(unsigned Opcode) { return ARMInsts[Opcode]; } @@ -4000,13 +5161,14 @@ static MCInstrDesc &getInstDesc(unsigned Opcode) { bool ARMAsmParser:: validateInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); // Check the IT block state first. - // NOTE: In Thumb mode, the BKPT instruction has the interesting property of - // being allowed in IT blocks, but not being predicable. It just always + // NOTE: BKPT instruction has the interesting property of being + // allowed in IT blocks, but not being predicable. It just always // executes. - if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) { + if (inITBlock() && Inst.getOpcode() != ARM::tBKPT && + Inst.getOpcode() != ARM::BKPT) { unsigned bit = 1; if (ITState.FirstCond) ITState.FirstCond = false; @@ -4115,16 +5277,21 @@ validateInstruction(MCInst &Inst, "in register list"); break; } + // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2, + // so only issue a diagnostic for thumb1. The instructions will be + // switched to the t2 encodings in processInstruction() if necessary. case ARM::tPOP: { bool listContainsBase; - if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase)) + if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) && + !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or pc"); break; } case ARM::tPUSH: { bool listContainsBase; - if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase)) + if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) && + !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or lr"); break; @@ -4141,10 +5308,1553 @@ validateInstruction(MCInst &Inst, return false; } -void ARMAsmParser:: +static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) { + switch(Opc) { + default: llvm_unreachable("unexpected opcode!"); + // VST1LN + case ARM::VST1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; + case ARM::VST1LNdWB_register_Asm_8: Spacing = 1; return ARM::VST1LNd8_UPD; + case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD; + case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD; + case ARM::VST1LNdAsm_8: Spacing = 1; return ARM::VST1LNd8; + case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16; + case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32; + + // VST2LN + case ARM::VST2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; + case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; + case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; + + case ARM::VST2LNdWB_register_Asm_8: Spacing = 1; return ARM::VST2LNd8_UPD; + case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD; + case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD; + case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD; + case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD; + + case ARM::VST2LNdAsm_8: Spacing = 1; return ARM::VST2LNd8; + case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16; + case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32; + case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16; + case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32; + + // VST3LN + case ARM::VST3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD; + case ARM::VST3LNdWB_register_Asm_8: Spacing = 1; return ARM::VST3LNd8_UPD; + case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD; + case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD; + case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD; + case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD; + case ARM::VST3LNdAsm_8: Spacing = 1; return ARM::VST3LNd8; + case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16; + case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32; + case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16; + case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32; + + // VST3 + case ARM::VST3dWB_fixed_Asm_8: Spacing = 1; return ARM::VST3d8_UPD; + case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD; + case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD; + case ARM::VST3qWB_fixed_Asm_8: Spacing = 2; return ARM::VST3q8_UPD; + case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD; + case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD; + case ARM::VST3dWB_register_Asm_8: Spacing = 1; return ARM::VST3d8_UPD; + case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD; + case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD; + case ARM::VST3qWB_register_Asm_8: Spacing = 2; return ARM::VST3q8_UPD; + case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD; + case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD; + case ARM::VST3dAsm_8: Spacing = 1; return ARM::VST3d8; + case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16; + case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32; + case ARM::VST3qAsm_8: Spacing = 2; return ARM::VST3q8; + case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16; + case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32; + + // VST4LN + case ARM::VST4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdWB_register_Asm_8: Spacing = 1; return ARM::VST4LNd8_UPD; + case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD; + case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD; + case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD; + case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD; + case ARM::VST4LNdAsm_8: Spacing = 1; return ARM::VST4LNd8; + case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16; + case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32; + case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16; + case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32; + + // VST4 + case ARM::VST4dWB_fixed_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; + case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; + case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD; + case ARM::VST4qWB_fixed_Asm_8: Spacing = 2; return ARM::VST4q8_UPD; + case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD; + case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD; + case ARM::VST4dWB_register_Asm_8: Spacing = 1; return ARM::VST4d8_UPD; + case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD; + case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD; + case ARM::VST4qWB_register_Asm_8: Spacing = 2; return ARM::VST4q8_UPD; + case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD; + case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD; + case ARM::VST4dAsm_8: Spacing = 1; return ARM::VST4d8; + case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16; + case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32; + case ARM::VST4qAsm_8: Spacing = 2; return ARM::VST4q8; + case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16; + case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32; + } +} + +static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { + switch(Opc) { + default: llvm_unreachable("unexpected opcode!"); + // VLD1LN + case ARM::VLD1LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD1LNd8_UPD; + case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD; + case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD; + case ARM::VLD1LNdAsm_8: Spacing = 1; return ARM::VLD1LNd8; + case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16; + case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32; + + // VLD2LN + case ARM::VLD2LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD; + case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; + case ARM::VLD2LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD2LNd8_UPD; + case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD; + case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD; + case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD; + case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD; + case ARM::VLD2LNdAsm_8: Spacing = 1; return ARM::VLD2LNd8; + case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16; + case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32; + case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16; + case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32; + + // VLD3DUP + case ARM::VLD3DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; + case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; + case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; + case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD; + case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; + case ARM::VLD3DUPdAsm_8: Spacing = 1; return ARM::VLD3DUPd8; + case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16; + case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32; + case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8; + case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16; + case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32; + + // VLD3LN + case ARM::VLD3LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD3LNd8_UPD; + case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD; + case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD; + case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD; + case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD; + case ARM::VLD3LNdAsm_8: Spacing = 1; return ARM::VLD3LNd8; + case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16; + case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32; + case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16; + case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32; + + // VLD3 + case ARM::VLD3dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD; + case ARM::VLD3dWB_register_Asm_8: Spacing = 1; return ARM::VLD3d8_UPD; + case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD; + case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD; + case ARM::VLD3qWB_register_Asm_8: Spacing = 2; return ARM::VLD3q8_UPD; + case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD; + case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD; + case ARM::VLD3dAsm_8: Spacing = 1; return ARM::VLD3d8; + case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16; + case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32; + case ARM::VLD3qAsm_8: Spacing = 2; return ARM::VLD3q8; + case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16; + case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32; + + // VLD4LN + case ARM::VLD4LNdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdWB_register_Asm_8: Spacing = 1; return ARM::VLD4LNd8_UPD; + case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD; + case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD; + case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD; + case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD; + case ARM::VLD4LNdAsm_8: Spacing = 1; return ARM::VLD4LNd8; + case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16; + case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32; + case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16; + case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32; + + // VLD4DUP + case ARM::VLD4DUPdWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD4DUPd8_UPD; + case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD; + case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD; + case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD; + case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD; + case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD; + case ARM::VLD4DUPdAsm_8: Spacing = 1; return ARM::VLD4DUPd8; + case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16; + case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32; + case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8; + case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16; + case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32; + + // VLD4 + case ARM::VLD4dWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; + case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; + case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD; + case ARM::VLD4qWB_fixed_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD; + case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD; + case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD; + case ARM::VLD4dWB_register_Asm_8: Spacing = 1; return ARM::VLD4d8_UPD; + case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD; + case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD; + case ARM::VLD4qWB_register_Asm_8: Spacing = 2; return ARM::VLD4q8_UPD; + case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD; + case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD; + case ARM::VLD4dAsm_8: Spacing = 1; return ARM::VLD4d8; + case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16; + case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32; + case ARM::VLD4qAsm_8: Spacing = 2; return ARM::VLD4q8; + case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16; + case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32; + } +} + +bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { + // Aliases for alternate PC+imm syntax of LDR instructions. + case ARM::t2LDRpcrel: + Inst.setOpcode(ARM::t2LDRpci); + return true; + case ARM::t2LDRBpcrel: + Inst.setOpcode(ARM::t2LDRBpci); + return true; + case ARM::t2LDRHpcrel: + Inst.setOpcode(ARM::t2LDRHpci); + return true; + case ARM::t2LDRSBpcrel: + Inst.setOpcode(ARM::t2LDRSBpci); + return true; + case ARM::t2LDRSHpcrel: + Inst.setOpcode(ARM::t2LDRSHpci); + return true; + // Handle NEON VST complex aliases. + case ARM::VST1LNdWB_register_Asm_8: + case ARM::VST1LNdWB_register_Asm_16: + case ARM::VST1LNdWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_register_Asm_8: + case ARM::VST2LNdWB_register_Asm_16: + case ARM::VST2LNdWB_register_Asm_32: + case ARM::VST2LNqWB_register_Asm_16: + case ARM::VST2LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST3LNdWB_register_Asm_8: + case ARM::VST3LNdWB_register_Asm_16: + case ARM::VST3LNdWB_register_Asm_32: + case ARM::VST3LNqWB_register_Asm_16: + case ARM::VST3LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST4LNdWB_register_Asm_8: + case ARM::VST4LNdWB_register_Asm_16: + case ARM::VST4LNdWB_register_Asm_32: + case ARM::VST4LNqWB_register_Asm_16: + case ARM::VST4LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VST1LNdWB_fixed_Asm_8: + case ARM::VST1LNdWB_fixed_Asm_16: + case ARM::VST1LNdWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdWB_fixed_Asm_8: + case ARM::VST2LNdWB_fixed_Asm_16: + case ARM::VST2LNdWB_fixed_Asm_32: + case ARM::VST2LNqWB_fixed_Asm_16: + case ARM::VST2LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST3LNdWB_fixed_Asm_8: + case ARM::VST3LNdWB_fixed_Asm_16: + case ARM::VST3LNdWB_fixed_Asm_32: + case ARM::VST3LNqWB_fixed_Asm_16: + case ARM::VST3LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST4LNdWB_fixed_Asm_8: + case ARM::VST4LNdWB_fixed_Asm_16: + case ARM::VST4LNdWB_fixed_Asm_32: + case ARM::VST4LNqWB_fixed_Asm_16: + case ARM::VST4LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST1LNdAsm_8: + case ARM::VST1LNdAsm_16: + case ARM::VST1LNdAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST2LNdAsm_8: + case ARM::VST2LNdAsm_16: + case ARM::VST2LNdAsm_32: + case ARM::VST2LNqAsm_16: + case ARM::VST2LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST3LNdAsm_8: + case ARM::VST3LNdAsm_16: + case ARM::VST3LNdAsm_32: + case ARM::VST3LNqAsm_16: + case ARM::VST3LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VST4LNdAsm_8: + case ARM::VST4LNdAsm_16: + case ARM::VST4LNdAsm_32: + case ARM::VST4LNqAsm_16: + case ARM::VST4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // Handle NEON VLD complex aliases. + case ARM::VLD1LNdWB_register_Asm_8: + case ARM::VLD1LNdWB_register_Asm_16: + case ARM::VLD1LNdWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_register_Asm_8: + case ARM::VLD2LNdWB_register_Asm_16: + case ARM::VLD2LNdWB_register_Asm_32: + case ARM::VLD2LNqWB_register_Asm_16: + case ARM::VLD2LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3LNdWB_register_Asm_8: + case ARM::VLD3LNdWB_register_Asm_16: + case ARM::VLD3LNdWB_register_Asm_32: + case ARM::VLD3LNqWB_register_Asm_16: + case ARM::VLD3LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4LNdWB_register_Asm_8: + case ARM::VLD4LNdWB_register_Asm_16: + case ARM::VLD4LNdWB_register_Asm_32: + case ARM::VLD4LNqWB_register_Asm_16: + case ARM::VLD4LNqWB_register_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(4)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(5)); // CondCode + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdWB_fixed_Asm_8: + case ARM::VLD1LNdWB_fixed_Asm_16: + case ARM::VLD1LNdWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdWB_fixed_Asm_8: + case ARM::VLD2LNdWB_fixed_Asm_16: + case ARM::VLD2LNdWB_fixed_Asm_32: + case ARM::VLD2LNqWB_fixed_Asm_16: + case ARM::VLD2LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3LNdWB_fixed_Asm_8: + case ARM::VLD3LNdWB_fixed_Asm_16: + case ARM::VLD3LNdWB_fixed_Asm_32: + case ARM::VLD3LNqWB_fixed_Asm_16: + case ARM::VLD3LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4LNdWB_fixed_Asm_8: + case ARM::VLD4LNdWB_fixed_Asm_16: + case ARM::VLD4LNdWB_fixed_Asm_32: + case ARM::VLD4LNqWB_fixed_Asm_16: + case ARM::VLD4LNqWB_fixed_Asm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD1LNdAsm_8: + case ARM::VLD1LNdAsm_16: + case ARM::VLD1LNdAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD2LNdAsm_8: + case ARM::VLD2LNdAsm_16: + case ARM::VLD2LNdAsm_32: + case ARM::VLD2LNqAsm_16: + case ARM::VLD2LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3LNdAsm_8: + case ARM::VLD3LNdAsm_16: + case ARM::VLD3LNdAsm_32: + case ARM::VLD3LNqAsm_16: + case ARM::VLD3LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4LNdAsm_8: + case ARM::VLD4LNdAsm_16: + case ARM::VLD4LNdAsm_32: + case ARM::VLD4LNqAsm_16: + case ARM::VLD4LNqAsm_32: { + MCInst TmpInst; + // Shuffle the operands around so the lane index operand is in the + // right place. + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(2)); // Rn + TmpInst.addOperand(Inst.getOperand(3)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd) + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // lane + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD3DUP single 3-element structure to all lanes instructions. + case ARM::VLD3DUPdAsm_8: + case ARM::VLD3DUPdAsm_16: + case ARM::VLD3DUPdAsm_32: + case ARM::VLD3DUPqAsm_8: + case ARM::VLD3DUPqAsm_16: + case ARM::VLD3DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_fixed_Asm_8: + case ARM::VLD3DUPdWB_fixed_Asm_16: + case ARM::VLD3DUPdWB_fixed_Asm_32: + case ARM::VLD3DUPqWB_fixed_Asm_8: + case ARM::VLD3DUPqWB_fixed_Asm_16: + case ARM::VLD3DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3DUPdWB_register_Asm_8: + case ARM::VLD3DUPdWB_register_Asm_16: + case ARM::VLD3DUPdWB_register_Asm_32: + case ARM::VLD3DUPqWB_register_Asm_8: + case ARM::VLD3DUPqWB_register_Asm_16: + case ARM::VLD3DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD3 multiple 3-element structure instructions. + case ARM::VLD3dAsm_8: + case ARM::VLD3dAsm_16: + case ARM::VLD3dAsm_32: + case ARM::VLD3qAsm_8: + case ARM::VLD3qAsm_16: + case ARM::VLD3qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3dWB_fixed_Asm_8: + case ARM::VLD3dWB_fixed_Asm_16: + case ARM::VLD3dWB_fixed_Asm_32: + case ARM::VLD3qWB_fixed_Asm_8: + case ARM::VLD3qWB_fixed_Asm_16: + case ARM::VLD3qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD3dWB_register_Asm_8: + case ARM::VLD3dWB_register_Asm_16: + case ARM::VLD3dWB_register_Asm_32: + case ARM::VLD3qWB_register_Asm_8: + case ARM::VLD3qWB_register_Asm_16: + case ARM::VLD3qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD4DUP single 3-element structure to all lanes instructions. + case ARM::VLD4DUPdAsm_8: + case ARM::VLD4DUPdAsm_16: + case ARM::VLD4DUPdAsm_32: + case ARM::VLD4DUPqAsm_8: + case ARM::VLD4DUPqAsm_16: + case ARM::VLD4DUPqAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_fixed_Asm_8: + case ARM::VLD4DUPdWB_fixed_Asm_16: + case ARM::VLD4DUPdWB_fixed_Asm_32: + case ARM::VLD4DUPqWB_fixed_Asm_8: + case ARM::VLD4DUPqWB_fixed_Asm_16: + case ARM::VLD4DUPqWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4DUPdWB_register_Asm_8: + case ARM::VLD4DUPdWB_register_Asm_16: + case ARM::VLD4DUPdWB_register_Asm_32: + case ARM::VLD4DUPqWB_register_Asm_8: + case ARM::VLD4DUPqWB_register_Asm_16: + case ARM::VLD4DUPqWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VLD4 multiple 4-element structure instructions. + case ARM::VLD4dAsm_8: + case ARM::VLD4dAsm_16: + case ARM::VLD4dAsm_32: + case ARM::VLD4qAsm_8: + case ARM::VLD4qAsm_16: + case ARM::VLD4qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4dWB_fixed_Asm_8: + case ARM::VLD4dWB_fixed_Asm_16: + case ARM::VLD4dWB_fixed_Asm_32: + case ARM::VLD4qWB_fixed_Asm_8: + case ARM::VLD4qWB_fixed_Asm_16: + case ARM::VLD4qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VLD4dWB_register_Asm_8: + case ARM::VLD4dWB_register_Asm_16: + case ARM::VLD4dWB_register_Asm_32: + case ARM::VLD4qWB_register_Asm_8: + case ARM::VLD4qWB_register_Asm_16: + case ARM::VLD4qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VST3 multiple 3-element structure instructions. + case ARM::VST3dAsm_8: + case ARM::VST3dAsm_16: + case ARM::VST3dAsm_32: + case ARM::VST3qAsm_8: + case ARM::VST3qAsm_16: + case ARM::VST3qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST3dWB_fixed_Asm_8: + case ARM::VST3dWB_fixed_Asm_16: + case ARM::VST3dWB_fixed_Asm_32: + case ARM::VST3qWB_fixed_Asm_8: + case ARM::VST3qWB_fixed_Asm_16: + case ARM::VST3qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST3dWB_register_Asm_8: + case ARM::VST3dWB_register_Asm_16: + case ARM::VST3dWB_register_Asm_32: + case ARM::VST3qWB_register_Asm_8: + case ARM::VST3qWB_register_Asm_16: + case ARM::VST3qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // VST4 multiple 3-element structure instructions. + case ARM::VST4dAsm_8: + case ARM::VST4dAsm_16: + case ARM::VST4dAsm_32: + case ARM::VST4qAsm_8: + case ARM::VST4qAsm_16: + case ARM::VST4qAsm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST4dWB_fixed_Asm_8: + case ARM::VST4dWB_fixed_Asm_16: + case ARM::VST4dWB_fixed_Asm_32: + case ARM::VST4qWB_fixed_Asm_8: + case ARM::VST4qWB_fixed_Asm_16: + case ARM::VST4qWB_fixed_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + + case ARM::VST4dWB_register_Asm_8: + case ARM::VST4dWB_register_Asm_16: + case ARM::VST4dWB_register_Asm_32: + case ARM::VST4qWB_register_Asm_8: + case ARM::VST4qWB_register_Asm_16: + case ARM::VST4qWB_register_Asm_32: { + MCInst TmpInst; + unsigned Spacing; + TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn + TmpInst.addOperand(Inst.getOperand(2)); // alignment + TmpInst.addOperand(Inst.getOperand(3)); // Rm + TmpInst.addOperand(Inst.getOperand(0)); // Vd + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 2)); + TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() + + Spacing * 3)); + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + + // Handle encoding choice for the shift-immediate instructions. + case ARM::t2LSLri: + case ARM::t2LSRri: + case ARM::t2ASRri: { + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !(static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2LSLri: NewOpc = ARM::tLSLri; break; + case ARM::t2LSRri: NewOpc = ARM::tLSRri; break; + case ARM::t2ASRri: NewOpc = ARM::tASRri; break; + } + // The Thumb1 operands aren't in the same order. Awesome, eh? + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } + + // Handle the Thumb2 mode MOV complex aliases. + case ARM::t2MOVsr: + case ARM::t2MOVSsr: { + // Which instruction to expand to depends on the CCOut operand and + // whether we're in an IT block if the register operands are low + // registers. + bool isNarrow = false; + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg()) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr)) + isNarrow = true; + MCInst TmpInst; + unsigned newOpc; + switch(ARM_AM::getSORegShOp(Inst.getOperand(3).getImm())) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRrr : ARM::t2ASRrr; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRrr : ARM::t2LSRrr; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLrr : ARM::t2LSLrr; break; + case ARM_AM::ror: newOpc = isNarrow ? ARM::tROR : ARM::t2RORrr; break; + } + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + if (isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // Rm + TmpInst.addOperand(Inst.getOperand(4)); // CondCode + TmpInst.addOperand(Inst.getOperand(5)); + if (!isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0)); + Inst = TmpInst; + return true; + } + case ARM::t2MOVsi: + case ARM::t2MOVSsi: { + // Which instruction to expand to depends on the CCOut operand and + // whether we're in an IT block if the register operands are low + // registers. + bool isNarrow = false; + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + isARMLowRegister(Inst.getOperand(1).getReg()) && + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi)) + isNarrow = true; + MCInst TmpInst; + unsigned newOpc; + switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) { + default: llvm_unreachable("unexpected opcode!"); + case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break; + case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break; + case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break; + case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break; + case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break; + } + unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()); + if (Ammount == 32) Ammount = 0; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + if (isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); + TmpInst.addOperand(Inst.getOperand(1)); // Rn + if (newOpc != ARM::t2RRX) + TmpInst.addOperand(MCOperand::CreateImm(Ammount)); + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + if (!isNarrow) + TmpInst.addOperand(MCOperand::CreateReg( + Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0)); + Inst = TmpInst; + return true; + } + // Handle the ARM mode MOV complex aliases. + case ARM::ASRr: + case ARM::LSRr: + case ARM::LSLr: + case ARM::RORr: { + ARM_AM::ShiftOpc ShiftTy; + switch(Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::ASRr: ShiftTy = ARM_AM::asr; break; + case ARM::LSRr: ShiftTy = ARM_AM::lsr; break; + case ARM::LSLr: ShiftTy = ARM_AM::lsl; break; + case ARM::RORr: ShiftTy = ARM_AM::ror; break; + } + unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0); + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVsr); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // Rm + TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); // cc_out + Inst = TmpInst; + return true; + } + case ARM::ASRi: + case ARM::LSRi: + case ARM::LSLi: + case ARM::RORi: { + ARM_AM::ShiftOpc ShiftTy; + switch(Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::ASRi: ShiftTy = ARM_AM::asr; break; + case ARM::LSRi: ShiftTy = ARM_AM::lsr; break; + case ARM::LSLi: ShiftTy = ARM_AM::lsl; break; + case ARM::RORi: ShiftTy = ARM_AM::ror; break; + } + // A shift by zero is a plain MOVr, not a MOVsi. + unsigned Amt = Inst.getOperand(2).getImm(); + unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi; + unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt); + MCInst TmpInst; + TmpInst.setOpcode(Opc); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + TmpInst.addOperand(Inst.getOperand(1)); // Rn + if (Opc == ARM::MOVsi) + TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); // cc_out + Inst = TmpInst; + return true; + } + case ARM::RRXi: { + unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0); + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVsi); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); // cc_out + Inst = TmpInst; + return true; + } + case ARM::t2LDMIA_UPD: { + // If this is a load of a single register, then we should use + // a post-indexed LDR instruction instead, per the ARM ARM. + if (Inst.getNumOperands() != 5) + return false; + MCInst TmpInst; + TmpInst.setOpcode(ARM::t2LDR_POST); + TmpInst.addOperand(Inst.getOperand(4)); // Rt + TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(4)); + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + return true; + } + case ARM::t2STMDB_UPD: { + // If this is a store of a single register, then we should use + // a pre-indexed STR instruction instead, per the ARM ARM. + if (Inst.getNumOperands() != 5) + return false; + MCInst TmpInst; + TmpInst.setOpcode(ARM::t2STR_PRE); + TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(4)); // Rt + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(-4)); + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + return true; + } case ARM::LDMIA_UPD: // If this is a load of a single register via a 'pop', then we should use // a post-indexed LDR instruction instead, per the ARM ARM. @@ -4160,6 +6870,7 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(2)); // CondCode TmpInst.addOperand(Inst.getOperand(3)); Inst = TmpInst; + return true; } break; case ARM::STMDB_UPD: @@ -4178,41 +6889,117 @@ processInstruction(MCInst &Inst, Inst = TmpInst; } break; + case ARM::t2ADDri12: + // If the immediate fits for encoding T3 (t2ADDri) and the generic "add" + // mnemonic was used (not "addw"), encoding T3 is preferred. + if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" || + ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) + break; + Inst.setOpcode(ARM::t2ADDri); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + break; + case ARM::t2SUBri12: + // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub" + // mnemonic was used (not "subw"), encoding T3 is preferred. + if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" || + ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) + break; + Inst.setOpcode(ARM::t2SUBri); + Inst.addOperand(MCOperand::CreateReg(0)); // cc_out + break; case ARM::tADDi8: // If the immediate is in the range 0-7, we want tADDi3 iff Rd was // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if <Rd> is specified and encoding T2 is preferred // to encoding T1 if <Rd> is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) + if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tADDi3); + return true; + } break; case ARM::tSUBi8: // If the immediate is in the range 0-7, we want tADDi3 iff Rd was // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if <Rd> is specified and encoding T2 is preferred // to encoding T1 if <Rd> is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) + if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tSUBi3); + return true; + } break; + case ARM::t2ADDri: + case ARM::t2SUBri: { + // If the destination and first source operand are the same, and + // the flags are compatible with the current IT status, use encoding T2 + // instead of T3. For compatibility with the system 'as'. Make sure the + // wide encoding wasn't explicit. + if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + !isARMLowRegister(Inst.getOperand(0).getReg()) || + (unsigned)Inst.getOperand(2).getImm() > 255 || + ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || + (inITBlock() && Inst.getOperand(5).getReg() != 0)) || + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + break; + MCInst TmpInst; + TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? + ARM::tADDi8 : ARM::tSUBi8); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + case ARM::t2ADDrr: { + // If the destination and first source operand are the same, and + // there's no setting of the flags, use encoding T2 instead of T3. + // Note that this is only for ADD, not SUB. This mirrors the system + // 'as' behaviour. Make sure the wide encoding wasn't explicit. + if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + Inst.getOperand(5).getReg() != 0 || + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + break; + MCInst TmpInst; + TmpInst.setOpcode(ARM::tADDhirr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } case ARM::tB: // A Thumb conditional branch outside of an IT block is a tBcc. - if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) + if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) { Inst.setOpcode(ARM::tBcc); + return true; + } break; case ARM::t2B: // A Thumb2 conditional branch outside of an IT block is a t2Bcc. - if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) + if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()){ Inst.setOpcode(ARM::t2Bcc); + return true; + } break; case ARM::t2Bcc: // If the conditional is AL or we're in an IT block, we really want t2B. - if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) + if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) { Inst.setOpcode(ARM::t2B); + return true; + } break; case ARM::tBcc: // If the conditional is AL, we really want tB. - if (Inst.getOperand(1).getImm() == ARMCC::AL) + if (Inst.getOperand(1).getImm() == ARMCC::AL) { Inst.setOpcode(ARM::tB); + return true; + } break; case ARM::tLDMIA: { // If the register list contains any high registers, or if the writeback @@ -4235,6 +7022,7 @@ processInstruction(MCInst &Inst, if (hasWritebackToken) Inst.insert(Inst.begin(), MCOperand::CreateReg(Inst.getOperand(0).getReg())); + return true; } break; } @@ -4248,14 +7036,40 @@ processInstruction(MCInst &Inst, // 16-bit encoding isn't sufficient. Switch to the 32-bit version. assert (isThumbTwo()); Inst.setOpcode(ARM::t2STMIA_UPD); + return true; } break; } + case ARM::tPOP: { + bool listContainsBase; + // If the register list contains any high registers, we need to use + // the 32-bit encoding instead if we're in Thumb2. Otherwise, this + // should have generated an error in validateInstruction(). + if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase)) + return false; + assert (isThumbTwo()); + Inst.setOpcode(ARM::t2LDMIA_UPD); + // Add the base register and writeback operands. + Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); + Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); + return true; + } + case ARM::tPUSH: { + bool listContainsBase; + if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase)) + return false; + assert (isThumbTwo()); + Inst.setOpcode(ARM::t2STMDB_UPD); + // Add the base register and writeback operands. + Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); + Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); + return true; + } case ARM::t2MOVi: { // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - Inst.getOperand(1).getImm() <= 255 && + (unsigned)Inst.getOperand(1).getImm() <= 255 && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR) || (inITBlock() && Inst.getOperand(4).getReg() == 0)) && @@ -4270,6 +7084,7 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(2)); TmpInst.addOperand(Inst.getOperand(3)); Inst = TmpInst; + return true; } break; } @@ -4290,6 +7105,7 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(2)); TmpInst.addOperand(Inst.getOperand(3)); Inst = TmpInst; + return true; } break; } @@ -4320,9 +7136,61 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(3)); TmpInst.addOperand(Inst.getOperand(4)); Inst = TmpInst; + return true; } break; } + case ARM::MOVsi: { + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm()); + if (SOpc == ARM_AM::rrx) return false; + if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) { + // Shifting by zero is accepted as a vanilla 'MOVr' + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVr); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); + Inst = TmpInst; + return true; + } + return false; + } + case ARM::ANDrsi: + case ARM::ORRrsi: + case ARM::EORrsi: + case ARM::BICrsi: + case ARM::SUBrsi: + case ARM::ADDrsi: { + unsigned newOpc; + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(3).getImm()); + if (SOpc == ARM_AM::rrx) return false; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::ANDrsi: newOpc = ARM::ANDrr; break; + case ARM::ORRrsi: newOpc = ARM::ORRrr; break; + case ARM::EORrsi: newOpc = ARM::EORrr; break; + case ARM::BICrsi: newOpc = ARM::BICrr; break; + case ARM::SUBrsi: newOpc = ARM::SUBrr; break; + case ARM::ADDrsi: newOpc = ARM::ADDrr; break; + } + // If the shift is by zero, use the non-shifted instruction definition. + if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) { + MCInst TmpInst; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(6)); + Inst = TmpInst; + return true; + } + return false; + } + case ARM::ITasm: case ARM::t2IT: { // The mask bits for all but the first condition are represented as // the low bit of the condition code value implies 't'. We currently @@ -4352,13 +7220,14 @@ processInstruction(MCInst &Inst, break; } } + return false; } unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // 16-bit thumb arithmetic instructions either require or preclude the 'S' // suffix depending on whether they're in an IT block or not. unsigned Opc = Inst.getOpcode(); - MCInstrDesc &MCID = getInstDesc(Opc); + const MCInstrDesc &MCID = getInstDesc(Opc); if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { assert(MCID.hasOptionalDef() && "optionally flag setting instruction missing optional def operand"); @@ -4417,14 +7286,23 @@ MatchAndEmitInstruction(SMLoc IDLoc, } // Some instructions need post-processing to, for example, tweak which - // encoding is selected. - processInstruction(Inst, Operands); + // encoding is selected. Loop on it while changes happen so the + // individual transformations can chain off each other. E.g., + // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8) + while (processInstruction(Inst, Operands)) + ; // Only move forward at the very end so that everything in validate // and process gets a consistent answer about whether we're in an IT // block. forwardITPosition(); + // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and + // doesn't actually encode. + if (Inst.getOpcode() == ARM::ITasm) + return false; + + Inst.setLoc(IDLoc); Out.EmitInstruction(Inst); return false; case Match_MissingFeature: @@ -4458,7 +7336,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, } llvm_unreachable("Implement any new match types added!"); - return true; } /// parseDirective parses the arm specific directives @@ -4468,12 +7345,20 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveWord(4, DirectiveID.getLoc()); else if (IDVal == ".thumb") return parseDirectiveThumb(DirectiveID.getLoc()); + else if (IDVal == ".arm") + return parseDirectiveARM(DirectiveID.getLoc()); else if (IDVal == ".thumb_func") return parseDirectiveThumbFunc(DirectiveID.getLoc()); else if (IDVal == ".code") return parseDirectiveCode(DirectiveID.getLoc()); else if (IDVal == ".syntax") return parseDirectiveSyntax(DirectiveID.getLoc()); + else if (IDVal == ".unreq") + return parseDirectiveUnreq(DirectiveID.getLoc()); + else if (IDVal == ".arch") + return parseDirectiveArch(DirectiveID.getLoc()); + else if (IDVal == ".eabi_attribute") + return parseDirectiveEabiAttr(DirectiveID.getLoc()); return true; } @@ -4509,9 +7394,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { return Error(L, "unexpected token in directive"); Parser.Lex(); - // TODO: set thumb mode - // TODO: tell the MC streamer the mode - // getParser().getStreamer().Emit???(); + if (!isThumb()) + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); + return false; +} + +/// parseDirectiveARM +/// ::= .arm +bool ARMAsmParser::parseDirectiveARM(SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + + if (isThumb()) + SwitchMode(); + getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); return false; } @@ -4521,24 +7419,33 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo(); bool isMachO = MAI.hasSubsectionsViaSymbols(); StringRef Name; + bool needFuncName = true; - // Darwin asm has function name after .thumb_func direction + // Darwin asm has (optionally) function name after .thumb_func direction // ELF doesn't if (isMachO) { const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) - return Error(L, "unexpected token in .thumb_func directive"); - Name = Tok.getString(); - Parser.Lex(); // Consume the identifier token. + if (Tok.isNot(AsmToken::EndOfStatement)) { + if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) + return Error(L, "unexpected token in .thumb_func directive"); + Name = Tok.getIdentifier(); + Parser.Lex(); // Consume the identifier token. + needFuncName = false; + } } if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(L, "unexpected token in directive"); - Parser.Lex(); + + // Eat the end of statement and any blank lines that follow. + while (getLexer().is(AsmToken::EndOfStatement)) + Parser.Lex(); // FIXME: assuming function name will be the line following .thumb_func - if (!isMachO) { - Name = Parser.getTok().getString(); + // We really should be checking the next symbol definition even if there's + // stuff in between. + if (needFuncName) { + Name = Parser.getTok().getIdentifier(); } // Mark symbol as a thumb symbol. @@ -4601,6 +7508,57 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { return false; } +/// parseDirectiveReq +/// ::= name .req registername +bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { + Parser.Lex(); // Eat the '.req' token. + unsigned Reg; + SMLoc SRegLoc, ERegLoc; + if (ParseRegister(Reg, SRegLoc, ERegLoc)) { + Parser.EatToEndOfStatement(); + return Error(SRegLoc, "register name expected"); + } + + // Shouldn't be anything else. + if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { + Parser.EatToEndOfStatement(); + return Error(Parser.getTok().getLoc(), + "unexpected input in .req directive."); + } + + Parser.Lex(); // Consume the EndOfStatement + + if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) + return Error(SRegLoc, "redefinition of '" + Name + + "' does not match original."); + + return false; +} + +/// parseDirectiveUneq +/// ::= .unreq registername +bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Parser.EatToEndOfStatement(); + return Error(L, "unexpected input in .unreq directive."); + } + RegisterReqs.erase(Parser.getTok().getIdentifier()); + Parser.Lex(); // Eat the identifier. + return false; +} + +/// parseDirectiveArch +/// ::= .arch token +bool ARMAsmParser::parseDirectiveArch(SMLoc L) { + return true; +} + +/// parseDirectiveEabiAttr +/// ::= .eabi_attribute int, int +bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { + return true; +} + extern "C" void LLVMInitializeARMAsmLexer(); /// Force static initialization. diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 8f2f813..2f504b7 100644 --- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1,4 +1,4 @@ -//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===// +//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===// // // The LLVM Compiler Infrastructure // @@ -9,17 +9,16 @@ #define DEBUG_TYPE "arm-disassembler" -#include "ARM.h" -#include "ARMRegisterInfo.h" -#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" @@ -52,7 +51,7 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; private: }; @@ -77,7 +76,7 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; private: mutable std::vector<unsigned> ITBlock; DecodeStatus AddThumbPredicate(MCInst&) const; @@ -97,224 +96,236 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { Out = In; return false; } - return false; + llvm_unreachable("Invalid DecodeStatus!"); } // Forward declare these because the autogenerated code will reference them. // Definitions are further down. -static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); -static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, +static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst, +static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst, unsigned Insn, uint64_t Adddress, const void *Decoder); -static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, + +static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val, +static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); - - +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" #include "ARMGenInstrInfo.inc" #include "ARMGenEDInfo.inc" @@ -327,11 +338,11 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge return new ThumbDisassembler(STI); } -EDInstInfo *ARMDisassembler::getEDInfo() const { +const EDInstInfo *ARMDisassembler::getEDInfo() const { return instInfoARM; } -EDInstInfo *ThumbDisassembler::getEDInfo() const { +const EDInstInfo *ThumbDisassembler::getEDInfo() const { return instInfoARM; } @@ -415,7 +426,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } namespace llvm { -extern MCInstrDesc ARMInsts[]; +extern const MCInstrDesc ARMInsts[]; } /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the @@ -435,40 +446,38 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, MCInst &MI, const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); - if (!getOpInfo) - return false; - struct LLVMOpInfo1 SymbolicOp; + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); SymbolicOp.Value = Value; void *DisInfo = Dis->getDisInfoBlock(); - if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - if (isBranch) { - LLVMSymbolLookupCallback SymbolLookUp = - Dis->getLLVMSymbolLookupCallback(); - if (SymbolLookUp) { - uint64_t ReferenceType; - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - const char *ReferenceName; - const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, - &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = true; - SymbolicOp.Value = 0; - } - else { - SymbolicOp.Value = Value; - } - if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; - } - else { - return false; - } - } - else { + + if (!getOpInfo || + !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + // Clear SymbolicOp.Value from above and also all other fields. + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (!SymbolLookUp) return false; + uint64_t ReferenceType; + if (isBranch) + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + else + ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + const char *ReferenceName; + const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, + &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; } + // For branches always create an MCExpr so it gets printed as hex address. + else if (isBranch) { + SymbolicOp.Value = Value; + } + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; + if (!Name && !isBranch) + return false; } MCContext *Ctx = Dis->getMCContext(); @@ -527,8 +536,8 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) MI.addOperand(MCOperand::CreateExpr(Expr)); - else - assert(0 && "bad SymbolicOp.VariantKind"); + else + llvm_unreachable("bad SymbolicOp.VariantKind"); return true; } @@ -543,7 +552,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, /// a literal 'C' string if the referenced address of the literal pool's entry /// is an address into a section with 'C' string literals. static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value, - const void *Decoder) { + const void *Decoder) { const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); if (SymbolLookUp) { @@ -841,14 +850,14 @@ extern "C" void LLVMInitializeARMDisassembler() { createThumbDisassembler); } -static const unsigned GPRDecoderTable[] = { +static const uint16_t GPRDecoderTable[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, ARM::R10, ARM::R11, ARM::R12, ARM::SP, ARM::LR, ARM::PC }; -static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -859,20 +868,26 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo == 15) return MCDisassembler::Fail; - return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); + DecodeStatus S = MCDisassembler::Success; + + if (RegNo == 15) + S = MCDisassembler::SoftFail; + + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + + return S; } -static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { unsigned Register = 0; switch (RegNo) { @@ -902,13 +917,13 @@ static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static const unsigned SPRDecoderTable[] = { +static const uint16_t SPRDecoderTable[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, ARM::S4, ARM::S5, ARM::S6, ARM::S7, ARM::S8, ARM::S9, ARM::S10, ARM::S11, @@ -919,7 +934,7 @@ static const unsigned SPRDecoderTable[] = { ARM::S28, ARM::S29, ARM::S30, ARM::S31 }; -static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -929,7 +944,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static const unsigned DPRDecoderTable[] = { +static const uint16_t DPRDecoderTable[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, ARM::D4, ARM::D5, ARM::D6, ARM::D7, ARM::D8, ARM::D9, ARM::D10, ARM::D11, @@ -940,7 +955,7 @@ static const unsigned DPRDecoderTable[] = { ARM::D28, ARM::D29, ARM::D30, ARM::D31 }; -static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -950,7 +965,7 @@ static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; @@ -958,14 +973,14 @@ static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); } -static const unsigned QPRDecoderTable[] = { +static const uint16_t QPRDecoderTable[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11, @@ -973,7 +988,7 @@ static const unsigned QPRDecoderTable[] = { }; -static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -984,7 +999,49 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, +static const uint16_t DPairDecoderTable[] = { + ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6, + ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12, + ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18, + ARM::Q9, ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24, + ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30, + ARM::Q15 +}; + +static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 30) + return MCDisassembler::Fail; + + unsigned Register = DPairDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static const uint16_t DPairSpacedDecoderTable[] = { + ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5, + ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9, + ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13, + ARM::D12_D14, ARM::D13_D15, ARM::D14_D16, ARM::D15_D17, + ARM::D16_D18, ARM::D17_D19, ARM::D18_D20, ARM::D19_D21, + ARM::D20_D22, ARM::D21_D23, ARM::D22_D24, ARM::D23_D25, + ARM::D24_D26, ARM::D25_D27, ARM::D26_D28, ARM::D27_D29, + ARM::D28_D30, ARM::D29_D31 +}; + +static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 29) + return MCDisassembler::Fail; + + unsigned Register = DPairSpacedDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xF) return MCDisassembler::Fail; // AL predicate is not allowed on Thumb1 branches. @@ -998,7 +1055,7 @@ static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val) Inst.addOperand(MCOperand::CreateReg(ARM::CPSR)); @@ -1007,7 +1064,7 @@ static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { uint32_t imm = Val & 0xFF; uint32_t rot = (Val & 0xF00) >> 7; @@ -1016,7 +1073,7 @@ static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1053,7 +1110,7 @@ static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1088,7 +1145,7 @@ static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1123,7 +1180,7 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1140,7 +1197,7 @@ static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1157,7 +1214,7 @@ static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { // This operand encodes a mask of contiguous zeros between a specified MSB // and LSB. To decode it, we create the mask of all bits MSB-and-lower, @@ -1178,7 +1235,7 @@ static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1234,16 +1291,6 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; - unsigned P = fieldFromInstruction32(Insn, 24, 1); - unsigned W = fieldFromInstruction32(Insn, 21, 1); - - bool writeback = (P == 0) || (W == 1); - unsigned idx_mode = 0; - if (P && writeback) - idx_mode = ARMII::IndexModePre; - else if (!P && writeback) - idx_mode = ARMII::IndexModePost; - switch (Inst.getOpcode()) { case ARM::t2LDC2_OFFSET: case ARM::t2LDC2L_OFFSET: @@ -1333,7 +1380,7 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1436,7 +1483,7 @@ DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1477,7 +1524,7 @@ static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, +DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1490,6 +1537,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, unsigned pred = fieldFromInstruction32(Insn, 28, 4); unsigned W = fieldFromInstruction32(Insn, 21, 1); unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt2 = Rt + 1; bool writeback = (W == 1) | (P == 0); @@ -1501,7 +1549,86 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: - if (Rt & 0x1) return MCDisassembler::Fail; + if (Rt & 0x1) S = MCDisassembler::SoftFail; + break; + default: + break; + } + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + if (P == 0 && W == 1) + S = MCDisassembler::SoftFail; + + if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2)) + S = MCDisassembler::SoftFail; + if (type && Rm == 15) + S = MCDisassembler::SoftFail; + if (Rt2 == 15) + S = MCDisassembler::SoftFail; + if (!type && fieldFromInstruction32(Insn, 8, 4)) + S = MCDisassembler::SoftFail; + break; + case ARM::STRH: + case ARM::STRH_PRE: + case ARM::STRH_POST: + if (Rt == 15) + S = MCDisassembler::SoftFail; + if (writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; + if (!type && Rm == 15) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + if (type && Rn == 15){ + if (Rt2 == 15) + S = MCDisassembler::SoftFail; + break; + } + if (P == 0 && W == 1) + S = MCDisassembler::SoftFail; + if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2)) + S = MCDisassembler::SoftFail; + if (!type && writeback && Rn == 15) + S = MCDisassembler::SoftFail; + if (writeback && (Rn == Rt || Rn == Rt2)) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRH: + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + if (type && Rn == 15){ + if (Rt == 15) + S = MCDisassembler::SoftFail; + break; + } + if (Rt == 15) + S = MCDisassembler::SoftFail; + if (!type && Rm == 15) + S = MCDisassembler::SoftFail; + if (!type && writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRSH: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + case ARM::LDRSB: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + if (type && Rn == 15){ + if (Rt == 15) + S = MCDisassembler::SoftFail; + break; + } + if (type && (Rt == 15 || (writeback && Rn == Rt))) + S = MCDisassembler::SoftFail; + if (!type && (Rt == 15 || Rm == 15)) + S = MCDisassembler::SoftFail; + if (!type && writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; break; default: break; @@ -1588,7 +1715,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1617,7 +1744,7 @@ static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, +static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1702,7 +1829,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, return S; } -static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction32(Insn, 18, 2); unsigned M = fieldFromInstruction32(Insn, 17, 1); @@ -1742,7 +1869,7 @@ static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction32(Insn, 9, 2); unsigned M = fieldFromInstruction32(Insn, 8, 1); @@ -1782,7 +1909,7 @@ static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1806,7 +1933,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1832,7 +1959,7 @@ static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1860,7 +1987,7 @@ static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1880,7 +2007,7 @@ static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1899,13 +2026,28 @@ static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); } static DecodeStatus -DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2BInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) | + (fieldFromInstruction32(Insn, 11, 1) << 18) | + (fieldFromInstruction32(Insn, 13, 1) << 17) | + (fieldFromInstruction32(Insn, 16, 6) << 11) | + (fieldFromInstruction32(Insn, 26, 1) << 19); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); + return S; +} + +static DecodeStatus +DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1915,12 +2057,14 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, if (pred == 0xF) { Inst.setOpcode(ARM::BLXi); imm |= fieldFromInstruction32(Insn, 24, 1) << 1; + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, + true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); return S; } - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true, - 4, Inst, Decoder)) + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, + true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) return MCDisassembler::Fail; @@ -1929,13 +2073,7 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Val)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1952,7 +2090,7 @@ static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1964,47 +2102,38 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); // First output register - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8: + case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register: + case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8: + case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register: + case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD2b16: + case ARM::VLD2b32: + case ARM::VLD2b8: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b16wb_register: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2b32wb_register: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b8wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } // Second output register switch (Inst.getOpcode()) { - case ARM::VLD1q8: - case ARM::VLD1q16: - case ARM::VLD1q32: - case ARM::VLD1q64: - case ARM::VLD1q8_UPD: - case ARM::VLD1q16_UPD: - case ARM::VLD1q32_UPD: - case ARM::VLD1q64_UPD: - case ARM::VLD1d8T: - case ARM::VLD1d16T: - case ARM::VLD1d32T: - case ARM::VLD1d64T: - case ARM::VLD1d8T_UPD: - case ARM::VLD1d16T_UPD: - case ARM::VLD1d32T_UPD: - case ARM::VLD1d64T_UPD: - case ARM::VLD1d8Q: - case ARM::VLD1d16Q: - case ARM::VLD1d32Q: - case ARM::VLD1d64Q: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: - case ARM::VLD2d8: - case ARM::VLD2d16: - case ARM::VLD2d32: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2020,12 +2149,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) return MCDisassembler::Fail; break; - case ARM::VLD2b8: - case ARM::VLD2b16: - case ARM::VLD2b32: - case ARM::VLD2b8_UPD: - case ARM::VLD2b16_UPD: - case ARM::VLD2b32_UPD: case ARM::VLD3q8: case ARM::VLD3q16: case ARM::VLD3q32: @@ -2046,28 +2169,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Third output register switch(Inst.getOpcode()) { - case ARM::VLD1d8T: - case ARM::VLD1d16T: - case ARM::VLD1d32T: - case ARM::VLD1d64T: - case ARM::VLD1d8T_UPD: - case ARM::VLD1d16T_UPD: - case ARM::VLD1d32T_UPD: - case ARM::VLD1d64T_UPD: - case ARM::VLD1d8Q: - case ARM::VLD1d16Q: - case ARM::VLD1d32Q: - case ARM::VLD1d64Q: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2104,20 +2205,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Fourth output register switch (Inst.getOpcode()) { - case ARM::VLD1d8Q: - case ARM::VLD1d16Q: - case ARM::VLD1d32Q: - case ARM::VLD1d64Q: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: case ARM::VLD4d8: case ARM::VLD4d16: case ARM::VLD4d32: @@ -2142,31 +2229,58 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Writeback operand switch (Inst.getOpcode()) { - case ARM::VLD1d8_UPD: - case ARM::VLD1d16_UPD: - case ARM::VLD1d32_UPD: - case ARM::VLD1d64_UPD: - case ARM::VLD1q8_UPD: - case ARM::VLD1q16_UPD: - case ARM::VLD1q32_UPD: - case ARM::VLD1q64_UPD: - case ARM::VLD1d8T_UPD: - case ARM::VLD1d16T_UPD: - case ARM::VLD1d32T_UPD: - case ARM::VLD1d64T_UPD: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: - case ARM::VLD2b8_UPD: - case ARM::VLD2b16_UPD: - case ARM::VLD2b32_UPD: + case ARM::VLD1d8wb_fixed: + case ARM::VLD1d16wb_fixed: + case ARM::VLD1d32wb_fixed: + case ARM::VLD1d64wb_fixed: + case ARM::VLD1d8wb_register: + case ARM::VLD1d16wb_register: + case ARM::VLD1d32wb_register: + case ARM::VLD1d64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: + case ARM::VLD1d8Twb_fixed: + case ARM::VLD1d8Twb_register: + case ARM::VLD1d16Twb_fixed: + case ARM::VLD1d16Twb_register: + case ARM::VLD1d32Twb_fixed: + case ARM::VLD1d32Twb_register: + case ARM::VLD1d64Twb_fixed: + case ARM::VLD1d64Twb_register: + case ARM::VLD1d8Qwb_fixed: + case ARM::VLD1d8Qwb_register: + case ARM::VLD1d16Qwb_fixed: + case ARM::VLD1d16Qwb_register: + case ARM::VLD1d32Qwb_fixed: + case ARM::VLD1d32Qwb_register: + case ARM::VLD1d64Qwb_fixed: + case ARM::VLD1d64Qwb_register: + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + case ARM::VLD2d8wb_register: + case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_register: + case ARM::VLD2q8wb_register: + case ARM::VLD2q16wb_register: + case ARM::VLD2q32wb_register: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2b8wb_register: + case ARM::VLD2b16wb_register: + case ARM::VLD2b32wb_register: + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2191,17 +2305,66 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // AddrMode6 Offset (register) - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + switch (Inst.getOpcode()) { + default: + // The below have been updated to have explicit am6offset split + // between fixed and register offset. For those instructions not + // yet updated, we need to add an additional reg0 operand for the + // fixed variant. + // + // The fixed offset encodes as Rm == 0xd, so we check for that. + if (Rm == 0xd) { + Inst.addOperand(MCOperand::CreateReg(0)); + break; + } + // Fall through to handle the register offset variant. + case ARM::VLD1d8wb_fixed: + case ARM::VLD1d16wb_fixed: + case ARM::VLD1d32wb_fixed: + case ARM::VLD1d64wb_fixed: + case ARM::VLD1d8Twb_fixed: + case ARM::VLD1d16Twb_fixed: + case ARM::VLD1d32Twb_fixed: + case ARM::VLD1d64Twb_fixed: + case ARM::VLD1d8Qwb_fixed: + case ARM::VLD1d16Qwb_fixed: + case ARM::VLD1d32Qwb_fixed: + case ARM::VLD1d64Qwb_fixed: + case ARM::VLD1d8wb_register: + case ARM::VLD1d16wb_register: + case ARM::VLD1d32wb_register: + case ARM::VLD1d64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: + // The fixed offset post-increment encodes Rm == 0xd. The no-writeback + // variant encodes Rm == 0xf. Anything else is a register offset post- + // increment and we need to add the register operand to the instruction. + if (Rm != 0xD && Rm != 0xF && + !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + break; + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + break; } return S; } -static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2214,31 +2377,60 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Writeback Operand switch (Inst.getOpcode()) { - case ARM::VST1d8_UPD: - case ARM::VST1d16_UPD: - case ARM::VST1d32_UPD: - case ARM::VST1d64_UPD: - case ARM::VST1q8_UPD: - case ARM::VST1q16_UPD: - case ARM::VST1q32_UPD: - case ARM::VST1q64_UPD: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: + case ARM::VST1d8wb_fixed: + case ARM::VST1d16wb_fixed: + case ARM::VST1d32wb_fixed: + case ARM::VST1d64wb_fixed: + case ARM::VST1d8wb_register: + case ARM::VST1d16wb_register: + case ARM::VST1d32wb_register: + case ARM::VST1d64wb_register: + case ARM::VST1q8wb_fixed: + case ARM::VST1q16wb_fixed: + case ARM::VST1q32wb_fixed: + case ARM::VST1q64wb_fixed: + case ARM::VST1q8wb_register: + case ARM::VST1q16wb_register: + case ARM::VST1q32wb_register: + case ARM::VST1q64wb_register: + case ARM::VST1d8Twb_fixed: + case ARM::VST1d16Twb_fixed: + case ARM::VST1d32Twb_fixed: + case ARM::VST1d64Twb_fixed: + case ARM::VST1d8Twb_register: + case ARM::VST1d16Twb_register: + case ARM::VST1d32Twb_register: + case ARM::VST1d64Twb_register: + case ARM::VST1d8Qwb_fixed: + case ARM::VST1d16Qwb_fixed: + case ARM::VST1d32Qwb_fixed: + case ARM::VST1d64Qwb_fixed: + case ARM::VST1d8Qwb_register: + case ARM::VST1d16Qwb_register: + case ARM::VST1d32Qwb_register: + case ARM::VST1d64Qwb_register: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2d8wb_register: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_register: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2q8wb_register: + case ARM::VST2q16wb_register: + case ARM::VST2q32wb_register: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: + case ARM::VST2b8wb_register: + case ARM::VST2b16wb_register: + case ARM::VST2b32wb_register: + if (Rm == 0xF) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VST3d8_UPD: case ARM::VST3d16_UPD: case ARM::VST3d32_UPD: @@ -2263,55 +2455,89 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // AddrMode6 Offset (register) - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + default: + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + break; + case ARM::VST1d8wb_fixed: + case ARM::VST1d16wb_fixed: + case ARM::VST1d32wb_fixed: + case ARM::VST1d64wb_fixed: + case ARM::VST1q8wb_fixed: + case ARM::VST1q16wb_fixed: + case ARM::VST1q32wb_fixed: + case ARM::VST1q64wb_fixed: + case ARM::VST1d8Twb_fixed: + case ARM::VST1d16Twb_fixed: + case ARM::VST1d32Twb_fixed: + case ARM::VST1d64Twb_fixed: + case ARM::VST1d8Qwb_fixed: + case ARM::VST1d16Qwb_fixed: + case ARM::VST1d32Qwb_fixed: + case ARM::VST1d64Qwb_fixed: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: + break; } + // First input register - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: + case ARM::VST1q8: + case ARM::VST1q16wb_fixed: + case ARM::VST1q16wb_register: + case ARM::VST1q32wb_fixed: + case ARM::VST1q32wb_register: + case ARM::VST1q64wb_fixed: + case ARM::VST1q64wb_register: + case ARM::VST1q8wb_fixed: + case ARM::VST1q8wb_register: + case ARM::VST2d16: + case ARM::VST2d32: + case ARM::VST2d8: + case ARM::VST2d16wb_fixed: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_fixed: + case ARM::VST2d32wb_register: + case ARM::VST2d8wb_fixed: + case ARM::VST2d8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VST2b16: + case ARM::VST2b32: + case ARM::VST2b8: + case ARM::VST2b16wb_fixed: + case ARM::VST2b16wb_register: + case ARM::VST2b32wb_fixed: + case ARM::VST2b32wb_register: + case ARM::VST2b8wb_fixed: + case ARM::VST2b8wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + } // Second input register switch (Inst.getOpcode()) { - case ARM::VST1q8: - case ARM::VST1q16: - case ARM::VST1q32: - case ARM::VST1q64: - case ARM::VST1q8_UPD: - case ARM::VST1q16_UPD: - case ARM::VST1q32_UPD: - case ARM::VST1q64_UPD: - case ARM::VST1d8T: - case ARM::VST1d16T: - case ARM::VST1d32T: - case ARM::VST1d64T: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2d8: - case ARM::VST2d16: - case ARM::VST2d32: - case ARM::VST2d8_UPD: - case ARM::VST2d16_UPD: - case ARM::VST2d32_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: @@ -2327,12 +2553,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) return MCDisassembler::Fail; break; - case ARM::VST2b8: - case ARM::VST2b16: - case ARM::VST2b32: - case ARM::VST2b8_UPD: - case ARM::VST2b16_UPD: - case ARM::VST2b32_UPD: case ARM::VST3q8: case ARM::VST3q16: case ARM::VST3q32: @@ -2354,28 +2574,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Third input register switch (Inst.getOpcode()) { - case ARM::VST1d8T: - case ARM::VST1d16T: - case ARM::VST1d32T: - case ARM::VST1d64T: - case ARM::VST1d8T_UPD: - case ARM::VST1d16T_UPD: - case ARM::VST1d32T_UPD: - case ARM::VST1d64T_UPD: - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST3d8: case ARM::VST3d16: case ARM::VST3d32: @@ -2412,20 +2610,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Fourth input register switch (Inst.getOpcode()) { - case ARM::VST1d8Q: - case ARM::VST1d16Q: - case ARM::VST1d32Q: - case ARM::VST1d64Q: - case ARM::VST1d8Q_UPD: - case ARM::VST1d16Q_UPD: - case ARM::VST1d32Q_UPD: - case ARM::VST1d64Q_UPD: - case ARM::VST2q8: - case ARM::VST2q16: - case ARM::VST2q32: - case ARM::VST2q8_UPD: - case ARM::VST2q16_UPD: - case ARM::VST2q32_UPD: case ARM::VST4d8: case ARM::VST4d16: case ARM::VST4d32: @@ -2451,7 +2635,7 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2461,15 +2645,21 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = fieldFromInstruction32(Insn, 6, 2); - unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1; align *= (1 << size); - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - if (regs == 2) { - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) + switch (Inst.getOpcode()) { + case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register: + case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; + break; } if (Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) @@ -2480,17 +2670,17 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) - return MCDisassembler::Fail; - } + // The fixed offset post-increment encodes Rm == 0xd. The no-writeback + // variant encodes Rm == 0xf. Anything else is a register offset post- + // increment and we need to add the register operand to the instruction. + if (Rm != 0xD && Rm != 0xF && + !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; return S; } -static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2500,18 +2690,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); - unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + unsigned pred = fieldFromInstruction32(Insn, 22, 4); align *= 2*size; - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) - return MCDisassembler::Fail; - if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + switch (Inst.getOpcode()) { + case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register: + case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2: + case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register: + case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register: + case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; + break; } + if (Rm != 0xF) + Inst.addOperand(MCOperand::CreateImm(0)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); @@ -2523,10 +2728,13 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + return S; } -static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2561,7 +2769,7 @@ static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2614,7 +2822,7 @@ static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2659,7 +2867,7 @@ DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2678,31 +2886,31 @@ static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(8 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(16 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(32 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(64 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2713,7 +2921,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; unsigned op = fieldFromInstruction32(Insn, 6, 1); - unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1; if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2722,9 +2929,15 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // Writeback } - for (unsigned i = 0; i < length; ++i) { - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VTBL2: + case ARM::VTBX2: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; } if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder))) @@ -2733,7 +2946,7 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2757,25 +2970,31 @@ static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, return S; } -static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2790,7 +3009,7 @@ static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2804,7 +3023,7 @@ static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { unsigned imm = Val << 2; @@ -2814,7 +3033,7 @@ static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateReg(ARM::SP)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -2822,7 +3041,7 @@ static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2839,7 +3058,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2894,7 +3113,7 @@ static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { int imm = Val & 0xFF; if (!(Val & 0x100)) imm *= -1; @@ -2903,7 +3122,7 @@ static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2918,7 +3137,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2933,7 +3152,7 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, return S; } -static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { int imm = Val & 0xFF; if (Val == 0) @@ -2946,7 +3165,7 @@ static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, } -static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2977,7 +3196,7 @@ static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3007,7 +3226,7 @@ static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3022,7 +3241,7 @@ static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, } -static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imm = fieldFromInstruction16(Insn, 0, 7); @@ -3033,7 +3252,7 @@ static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3058,7 +3277,7 @@ static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, return S; } -static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2; unsigned flags = fieldFromInstruction16(Insn, 0, 3); @@ -3069,29 +3288,29 @@ static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned add = fieldFromInstruction32(Insn, 4, 1); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(add)); return S; } -static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - if (!tryAddingSymbolicOperand(Address, + if (!tryAddingSymbolicOperand(Address, (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xA || Val == 0xB) return MCDisassembler::Fail; @@ -3101,7 +3320,7 @@ static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, +DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3117,7 +3336,7 @@ DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3159,7 +3378,7 @@ DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, // Decode a shifted immediate operand. These basically consist // of an 8-bit value, and a 4-bit directive that specifies either // a splat operation or a rotation. -static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { unsigned ctrl = fieldFromInstruction32(Val, 10, 2); if (ctrl == 0) { @@ -3191,19 +3410,23 @@ static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, +DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - Inst.addOperand(MCOperand::CreateImm(Val << 1)); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { switch (Val) { default: @@ -3223,14 +3446,14 @@ static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!Val) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3253,7 +3476,7 @@ static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder){ DecodeStatus S = MCDisassembler::Success; @@ -3280,7 +3503,7 @@ static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3305,7 +3528,7 @@ static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3333,7 +3556,7 @@ static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3358,7 +3581,7 @@ static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3383,7 +3606,7 @@ static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3442,7 +3665,7 @@ static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3500,7 +3723,7 @@ static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3567,7 +3790,7 @@ static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3631,7 +3854,7 @@ static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3701,7 +3924,7 @@ static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3765,7 +3988,7 @@ static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3839,7 +4062,7 @@ static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3904,7 +4127,7 @@ static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); @@ -3930,7 +4153,7 @@ static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); @@ -3956,7 +4179,7 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction16(Insn, 4, 4); @@ -3983,7 +4206,7 @@ static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4020,7 +4243,7 @@ DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4054,7 +4277,7 @@ DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, +static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, const void *Decoder) { unsigned sign1 = fieldFromInstruction32(Insn, 21, 1); unsigned sign2 = fieldFromInstruction32(Insn, 23, 1); @@ -4069,7 +4292,7 @@ static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, +static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4080,3 +4303,109 @@ static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, return S; } +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (pred == 0xF) + return DecodeCPSInstruction(Inst, Insn, Address, Decoder); + + DecodeStatus S = MCDisassembler::Success; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction32(Insn, 16, 6); + unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + + DecodeStatus S = MCDisassembler::Success; + + // VMOVv2f32 is ambiguous with these decodings. + if (!(imm & 0x38) && cmode == 0xF) { + Inst.setOpcode(ARM::VMOVv2f32); + return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + } + + if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + + if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(64 - imm)); + + return S; +} + +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction32(Insn, 16, 6); + unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + + DecodeStatus S = MCDisassembler::Success; + + // VMOVv4f32 is ambiguous with these decodings. + if (!(imm & 0x38) && cmode == 0xF) { + Inst.setOpcode(ARM::VMOVv4f32); + return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + } + + if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + + if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(64 - imm)); + + return S; +} + +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 16, 4); + unsigned Rt = fieldFromInstruction32(Val, 12, 4); + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + Rm |= (fieldFromInstruction32(Val, 23, 1) << 4); + unsigned Cond = fieldFromInstruction32(Val, 28, 4); + + if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt) + S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index ccdac3e..b3eeafe 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -18,11 +18,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "ARMGenAsmWriter.inc" /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. @@ -36,16 +36,14 @@ static unsigned translateShiftImm(unsigned imm) { ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI) { + MCInstPrinter(MAI, MII, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } -StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } @@ -101,7 +99,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // A8.6.123 PUSH if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { + MI->getOperand(0).getReg() == ARM::SP && + MI->getNumOperands() > 5) { + // Should only print PUSH if there are at least two registers in the list. O << '\t' << "push"; printPredicateOperand(MI, 2, O); if (Opcode == ARM::t2STMDB_UPD) @@ -122,7 +122,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // A8.6.122 POP if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { + MI->getOperand(0).getReg() == ARM::SP && + MI->getNumOperands() > 5) { + // Should only print POP if there are at least two registers in the list. O << '\t' << "pop"; printPredicateOperand(MI, 2, O); if (Opcode == ARM::t2LDMIA_UPD) @@ -250,7 +252,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (ShOpc == ARM_AM::rrx) return; - + O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); } @@ -433,6 +435,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, Op, O); + return; + } + const MCOperand &MO3 = MI->getOperand(Op+2); unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm()); @@ -636,7 +644,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (getAvailableFeatures() & ARM::FeatureMClass) { switch (Op.getImm()) { - default: assert(0 && "Unexpected mask value!"); + default: llvm_unreachable("Unexpected mask value!"); case 0: O << "apsr"; return; case 1: O << "iapsr"; return; case 2: O << "eapsr"; return; @@ -659,12 +667,11 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) { O << "APSR_"; switch (Mask) { - default: assert(0); + default: llvm_unreachable("Unexpected mask value!"); case 4: O << "g"; return; case 8: O << "nzcvq"; return; case 12: O << "nzcvqg"; return; } - llvm_unreachable("Unexpected mask value!"); } if (SpecRegRBit) @@ -684,7 +691,10 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); - if (CC != ARMCC::AL) + // Handle the undefined 15 CC value here for printing so we don't abort(). + if ((unsigned)CC == 15) + O << "<und>"; + else if (CC != ARMCC::AL) O << ARMCondCodeToString(CC); } @@ -882,6 +892,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); + if (!MO1.isReg()) { // For label symbolic references. + printOperand(MI, OpNum, O); + return; + } + O << "[" << getRegisterName(MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm() / 4; @@ -963,7 +978,8 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - O << "#0x" << utohexstr(Val); + O << "#0x"; + O.write_hex(Val); } void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, @@ -986,7 +1002,153 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, } } +void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#" << 16 - MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#" << 32 - MI->getOperand(OpNum).getImm(); +} + void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "[" << MI->getOperand(OpNum).getImm() << "]"; } + +void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}"; +} + +void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); + O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; +} + +void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); + O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; +} + +void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; +} + +void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}"; +} + +void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}"; +} + +void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); + O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; +} + +void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; +} + +void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; +} + +void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); + O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; +} + +void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}"; +} + +void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}"; +} + +void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}"; +} + +void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D<n>. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}"; +} diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 5c2173f..8acb7ee 100644 --- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -1,4 +1,4 @@ -//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===// +//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,14 +23,12 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); + ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - static const char *getInstructionName(unsigned Opcode); - // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); @@ -128,7 +126,33 @@ public: void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 9982fa6..62473b2 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -1,4 +1,4 @@ -//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===// +//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,7 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -43,7 +44,7 @@ namespace ARM_AM { static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return "asr"; case ARM_AM::lsl: return "lsl"; case ARM_AM::lsr: return "lsr"; @@ -54,7 +55,7 @@ namespace ARM_AM { static inline unsigned getShiftOpcEncoding(ShiftOpc Op) { switch (Op) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return 2; case ARM_AM::lsl: return 0; case ARM_AM::lsr: return 1; @@ -72,7 +73,7 @@ namespace ARM_AM { static inline const char *getAMSubModeStr(AMSubMode Mode) { switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); + default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::ia: return "ia"; case ARM_AM::ib: return "ib"; case ARM_AM::da: return "da"; @@ -569,7 +570,7 @@ namespace ARM_AM { } EltBits = 64; } else { - assert(false && "Unsupported NEON immediate"); + llvm_unreachable("Unsupported NEON immediate"); } return Val; } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index c31c5e6..d10bfc1 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -11,17 +11,18 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" @@ -31,8 +32,8 @@ using namespace llvm; namespace { class ARMELFObjectWriter : public MCELFObjectTargetWriter { public: - ARMELFObjectWriter(Triple::OSType OSType) - : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM, + ARMELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, /*HasRelocationAddend*/ false) {} }; @@ -60,15 +61,16 @@ public: // ARMFixupKinds.h. // // Name Offset (bits) Size (bits) Flags -{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, @@ -76,6 +78,9 @@ public: { "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, @@ -100,13 +105,50 @@ public: return Infos[Kind - FirstTargetFixupKind]; } - bool MayNeedRelaxation(const MCInst &Inst) const; + /// processFixupValue - Target hook to process the literal value of a fixup + /// if necessary. + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + MCValue &Target, uint64_t &Value, + bool &IsResolved) { + const MCSymbolRefExpr *A = Target.getSymA(); + // Some fixups to thumb function symbols need the low bit (thumb bit) + // twiddled. + if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { + if (A) { + const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); + if (Asm.isThumbFunc(&Sym)) + Value |= 1; + } + } + // We must always generate a relocation for BL/BLX instructions if we have + // a symbol to reference, as the linker relies on knowing the destination + // symbol's thumb-ness to get interworking right. + if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || + (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || + (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) + IsResolved = false; + } + + bool mayNeedRelaxation(const MCInst &Inst) const; + + bool fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const; - void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; + void relaxInstruction(const MCInst &Inst, MCInst &Res) const; - bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; - void HandleAssemblerFlag(MCAssemblerFlag Flag) { + void handleAssemblerFlag(MCAssemblerFlag Flag) { switch (Flag) { default: break; case MCAF_Code16: @@ -124,21 +166,81 @@ public: }; } // end anonymous namespace -bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const { - // FIXME: Thumb targets, different move constant targets.. +static unsigned getRelaxedOpcode(unsigned Op) { + switch (Op) { + default: return Op; + case ARM::tBcc: return ARM::t2Bcc; + case ARM::tLDRpciASM: return ARM::t2LDRpci; + case ARM::tADR: return ARM::t2ADR; + case ARM::tB: return ARM::t2B; + } +} + +bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { + if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()) + return true; return false; } -void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { - assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented"); - return; +bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCInstFragment *DF, + const MCAsmLayout &Layout) const { + switch ((unsigned)Fixup.getKind()) { + case ARM::fixup_arm_thumb_br: { + // Relaxing tB to t2B. tB has a signed 12-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 2046 || Offset < -2048; + } + case ARM::fixup_arm_thumb_bcc: { + // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 254 || Offset < -256; + } + case ARM::fixup_thumb_adr_pcrel_10: + case ARM::fixup_arm_thumb_cp: { + // If the immediate is negative, greater than 1020, or not a multiple + // of four, the wide version of the instruction must be used. + int64_t Offset = int64_t(Value) - 4; + return Offset > 1020 || Offset < 0 || Offset & 3; + } + } + llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!"); +} + +void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { + unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); + + // Sanity check w/ diagnostic if we get here w/ a bogus instruction. + if (RelaxedOp == Inst.getOpcode()) { + SmallString<256> Tmp; + raw_svector_ostream OS(Tmp); + Inst.dump_pretty(OS); + OS << "\n"; + report_fatal_error("unexpected instruction to relax: " + OS.str()); + } + + // The instructions we're relaxing have (so far) the same operands. + // We just need to update to the proper opcode. + Res = Inst; + Res.setOpcode(RelaxedOp); } -bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { +bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 - const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP + const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP if (isThumb()) { const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding; @@ -269,6 +371,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: + case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. return 0xffffff & ((Value - 8) >> 2); @@ -359,6 +464,19 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_thumb_bcc: // Offset by 4 and don't encode the lower bit, which is always 0. return ((Value - 4) >> 1) & 0xff; + case ARM::fixup_arm_pcrel_10_unscaled: { + Value = Value - 8; // ARM fixups offset by an additional word and don't + // need to adjust for the half-word ordering. + bool isAdd = true; + if ((int64_t)Value < 0) { + Value = -Value; + isAdd = false; + } + // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8]. + assert ((Value < 256) && "Out of range pc-relative fixup value!"); + Value = (Value & 0xf) | ((Value & 0xf0) << 4); + return Value | (isAdd << 23); + } case ARM::fixup_arm_pcrel_10: Value = Value - 4; // ARM fixups offset by an additional word and don't // need to adjust for the half-word ordering. @@ -376,8 +494,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { assert ((Value < 256) && "Out of range pc-relative fixup value!"); Value |= isAdd << 23; - // Same addressing mode as fixup_arm_pcrel_10, - // but with 16-bit halfwords swapped. + // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords + // swapped. if (Kind == ARM::fixup_t2_pcrel_10) { uint32_t swapped = (Value & 0xFFFF0000) >> 16; swapped |= (Value & 0x0000FFFF) << 16; @@ -395,22 +513,21 @@ namespace { // ELF is an ELF of course... class ELFARMAsmBackend : public ARMAsmBackend { public: - Triple::OSType OSType; + uint8_t OSABI; ELFARMAsmBackend(const Target &T, const StringRef TT, - Triple::OSType _OSType) - : ARMAsmBackend(T, TT), OSType(_OSType) { } + uint8_t _OSABI) + : ARMAsmBackend(T, TT), OSABI(_OSABI) { } - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS, - /*IsLittleEndian*/ true); + return createARMELFObjectWriter(OS, OSABI); } }; // FIXME: Raise this to share code between Darwin and ELF. -void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, +void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned NumBytes = 4; // FIXME: 2 for Thumb Value = adjustFixupValue(Fixup.getKind(), Value); @@ -439,7 +556,7 @@ public: Subtype); } - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; virtual bool doesSectionRequireSymbols(const MCSection &Section) const { @@ -464,9 +581,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_thumb_cb: return 2; + case ARM::fixup_arm_pcrel_10_unscaled: case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: + case ARM::fixup_arm_blx: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: return 3; @@ -491,7 +612,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { } } -void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, +void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); Value = adjustFixupValue(Fixup.getKind(), Value); @@ -527,5 +648,6 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) { if (TheTriple.isOSWindows()) assert(0 && "Windows not supported on ARM"); - return new ELFARMAsmBackend(T, TT, Triple(TT).getOS()); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new ELFARMAsmBackend(T, TT, OSABI); } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index ec4b6ff..ae11be8 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -67,7 +67,6 @@ namespace ARMCC { inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { switch (CC) { - default: llvm_unreachable("Unknown condition code"); case ARMCC::EQ: return "eq"; case ARMCC::NE: return "ne"; case ARMCC::HS: return "hs"; @@ -84,6 +83,7 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { case ARMCC::LE: return "le"; case ARMCC::AL: return "al"; } + llvm_unreachable("Unknown condition code"); } namespace ARM_PROC { @@ -185,6 +185,39 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { case S29: case D29: return 29; case S30: case D30: return 30; case S31: case D31: return 31; + + // Composite registers use the regnum of the first register in the list. + /* Q0 */ case D0_D2: return 0; + case D1_D2: case D1_D3: return 1; + /* Q1 */ case D2_D4: return 2; + case D3_D4: case D3_D5: return 3; + /* Q2 */ case D4_D6: return 4; + case D5_D6: case D5_D7: return 5; + /* Q3 */ case D6_D8: return 6; + case D7_D8: case D7_D9: return 7; + /* Q4 */ case D8_D10: return 8; + case D9_D10: case D9_D11: return 9; + /* Q5 */ case D10_D12: return 10; + case D11_D12: case D11_D13: return 11; + /* Q6 */ case D12_D14: return 12; + case D13_D14: case D13_D15: return 13; + /* Q7 */ case D14_D16: return 14; + case D15_D16: case D15_D17: return 15; + /* Q8 */ case D16_D18: return 16; + case D17_D18: case D17_D19: return 17; + /* Q9 */ case D18_D20: return 18; + case D19_D20: case D19_D21: return 19; + /* Q10 */ case D20_D22: return 20; + case D21_D22: case D21_D23: return 21; + /* Q11 */ case D22_D24: return 22; + case D23_D24: case D23_D25: return 23; + /* Q12 */ case D24_D26: return 24; + case D25_D26: case D25_D27: return 25; + /* Q13 */ case D26_D28: return 26; + case D27_D28: case D27_D29: return 27; + /* Q14 */ case D28_D30: return 28; + case D29_D30: case D29_D31: return 29; + /* Q15 */ } } @@ -237,7 +270,6 @@ namespace ARMII { inline static const char *AddrModeToString(AddrMode addrmode) { switch (addrmode) { - default: llvm_unreachable("Unknown memory operation"); case AddrModeNone: return "AddrModeNone"; case AddrMode1: return "AddrMode1"; case AddrMode2: return "AddrMode2"; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp new file mode 100644 index 0000000..aa649ba --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -0,0 +1,283 @@ +//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCValue.h" + +using namespace llvm; + +namespace { + class ARMELFObjectWriter : public MCELFObjectTargetWriter { + enum { DefaultEABIVersion = 0x05000000U }; + unsigned GetRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const; + + + public: + ARMELFObjectWriter(uint8_t OSABI); + + virtual ~ARMELFObjectWriter(); + + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) const; + virtual unsigned getEFlags() const; + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const; + }; +} + +ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, + ELF::EM_ARM, + /*HasRelocationAddend*/ false) {} + +ARMELFObjectWriter::~ARMELFObjectWriter() {} + +// FIXME: get the real EABI Version from the Triple. +unsigned ARMELFObjectWriter::getEFlags() const { + return ELF::EF_ARM_EABIMASK & DefaultEABIVersion; +} + +// In ARM, _MergedGlobals and other most symbols get emitted directly. +// I.e. not as an offset to a section symbol. +// This code is an approximation of what ARM/gcc does. + +STATISTIC(PCRelCount, "Total number of PIC Relocations"); +STATISTIC(NonPCRelCount, "Total number of non-PIC relocations"); + +const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + const MCFixup &Fixup, + bool IsPCRel) const { + const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); + bool EmitThisSym = false; + + const MCSectionELF &Section = + static_cast<const MCSectionELF&>(Symbol.getSection()); + bool InNormalSection = true; + unsigned RelocType = 0; + RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel); + + DEBUG( + const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind(); + MCSymbolRefExpr::VariantKind Kind2; + Kind2 = Target.getSymB() ? Target.getSymB()->getKind() : + MCSymbolRefExpr::VK_None; + dbgs() << "considering symbol " + << Section.getSectionName() << "/" + << Symbol.getName() << "/" + << " Rel:" << (unsigned)RelocType + << " Kind: " << (int)Kind << "/" << (int)Kind2 + << " Tmp:" + << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/" + << Symbol.isVariable() << "/" << Symbol.isTemporary() + << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n"); + + if (IsPCRel) { ++PCRelCount; + switch (RelocType) { + default: + // Most relocation types are emitted as explicit symbols + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".data.rel", false) + .Case(".bss", false) + .Default(true); + EmitThisSym = true; + break; + case ELF::R_ARM_ABS32: + // But things get strange with R_ARM_ABS32 + // In this case, most things that go in .rodata show up + // as section relative relocations + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".data.rel", false) + .Case(".rodata", false) + .Case(".bss", false) + .Default(true); + EmitThisSym = false; + break; + } + } else { + NonPCRelCount++; + InNormalSection = + StringSwitch<bool>(Section.getSectionName()) + .Case(".data.rel.ro.local", false) + .Case(".rodata", false) + .Case(".data.rel", false) + .Case(".bss", false) + .Default(true); + + switch (RelocType) { + default: EmitThisSym = true; break; + case ELF::R_ARM_ABS32: EmitThisSym = false; break; + } + } + + if (EmitThisSym) + return &Symbol; + if (! Symbol.isTemporary() && InNormalSection) { + return &Symbol; + } + return NULL; +} + +// Need to examine the Fixup when determining whether to +// emit the relocation as an explicit symbol or as a section relative +// offset +unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel, + bool IsRelocWithSymbol, + int64_t Addend) const { + return GetRelocTypeInner(Target, Fixup, IsPCRel); +} + +unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? + MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + + unsigned Type = 0; + if (IsPCRel) { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("Unimplemented"); + case FK_Data_4: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_REL32; + break; + case MCSymbolRefExpr::VK_ARM_TLSGD: + llvm_unreachable("unimplemented"); + case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + Type = ELF::R_ARM_TLS_IE32; + break; + } + break; + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_blx: + case ARM::fixup_arm_uncondbranch: + switch (Modifier) { + case MCSymbolRefExpr::VK_ARM_PLT: + Type = ELF::R_ARM_PLT32; + break; + default: + Type = ELF::R_ARM_CALL; + break; + } + break; + case ARM::fixup_arm_condbl: + case ARM::fixup_arm_condbranch: + Type = ELF::R_ARM_JUMP24; + break; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + Type = ELF::R_ARM_MOVT_PREL; + break; + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: + Type = ELF::R_ARM_MOVW_PREL_NC; + break; + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + Type = ELF::R_ARM_THM_MOVT_PREL; + break; + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + Type = ELF::R_ARM_THM_MOVW_PREL_NC; + break; + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + Type = ELF::R_ARM_THM_CALL; + break; + } + } else { + switch ((unsigned)Fixup.getKind()) { + default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + switch (Modifier) { + default: llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_ARM_GOT: + Type = ELF::R_ARM_GOT_BREL; + break; + case MCSymbolRefExpr::VK_ARM_TLSGD: + Type = ELF::R_ARM_TLS_GD32; + break; + case MCSymbolRefExpr::VK_ARM_TPOFF: + Type = ELF::R_ARM_TLS_LE32; + break; + case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + Type = ELF::R_ARM_TLS_IE32; + break; + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_ABS32; + break; + case MCSymbolRefExpr::VK_ARM_GOTOFF: + Type = ELF::R_ARM_GOTOFF32; + break; + case MCSymbolRefExpr::VK_ARM_TARGET1: + Type = ELF::R_ARM_TARGET1; + break; + } + break; + case ARM::fixup_arm_ldst_pcrel_12: + case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_adr_pcrel_12: + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_cb: + case ARM::fixup_arm_thumb_cp: + case ARM::fixup_arm_thumb_br: + llvm_unreachable("Unimplemented"); + case ARM::fixup_arm_uncondbranch: + Type = ELF::R_ARM_CALL; + break; + case ARM::fixup_arm_condbranch: + Type = ELF::R_ARM_JUMP24; + break; + case ARM::fixup_arm_movt_hi16: + Type = ELF::R_ARM_MOVT_ABS; + break; + case ARM::fixup_arm_movw_lo16: + Type = ELF::R_ARM_MOVW_ABS_NC; + break; + case ARM::fixup_t2_movt_hi16: + Type = ELF::R_ARM_THM_MOVT_ABS; + break; + case ARM::fixup_t2_movw_lo16: + Type = ELF::R_ARM_THM_MOVW_ABS_NC; + break; + } + } + + return Type; +} + +MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS, + uint8_t OSABI) { + MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI); + return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +} diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 350c92d..0085feb 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -1,4 +1,4 @@ -//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===// +//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,6 +23,9 @@ enum Fixups { // the 16-bit halfwords reordered. fixup_t2_ldst_pcrel_12, + // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol + // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded. + fixup_arm_pcrel_10_unscaled, // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses // used in VFP instructions where the lower 2 bits are not encoded // (so it's encoded as an 8-bit immediate). @@ -56,6 +59,25 @@ enum Fixups { // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, + // The following fixups handle the ARM BL instructions. These can be + // conditionalised; however, the ARM ELF ABI requires a different relocation + // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that + // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has + // no conditional version; R_ARM_JUMP24 would have to insert a veneer. + // + // MachO does not draw a distinction between the two cases, so it will treat + // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups. + + // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions. + fixup_arm_uncondbl, + + // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial + // conditionalisation. + fixup_arm_condbl, + + // fixup_arm_blx - Fixup for ARM BLX instructions. + fixup_arm_blx, + // fixup_arm_thumb_bl - Fixup for Thumb BL instructions. fixup_arm_thumb_bl, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 1c109e0..03e8d5f 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===// +//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -48,6 +48,8 @@ static const char *const arm_asm_table[] = { 0,0 }; +void ARMMCAsmInfoDarwin::anchor() { } + ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { AsmTransCBE = arm_asm_table; Data64bitsDirective = 0; @@ -61,6 +63,8 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { ExceptionsType = ExceptionHandling::SjLj; } +void ARMELFMCAsmInfo::anchor() { } + ARMELFMCAsmInfo::ARMELFMCAsmInfo() { // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 90f7822..f0b289c 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -1,4 +1,4 @@ -//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====// +//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -18,11 +18,15 @@ namespace llvm { - struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { + class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { + virtual void anchor(); + public: explicit ARMMCAsmInfoDarwin(); }; - struct ARMELFMCAsmInfo : public MCAsmInfo { + class ARMELFMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: explicit ARMELFMCAsmInfo(); }; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 865c3e2..10d1c48 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -64,7 +64,7 @@ public: // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. - unsigned getBinaryCodeForInstr(const MCInst &MI, + uint64_t getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups) const; /// getMachineOpValue - Return binary encoding of operand. If the machine @@ -118,8 +118,10 @@ public: /// branch target. uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const; + uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const; uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl<MCFixup> &Fixups) const; + SmallVectorImpl<MCFixup> &Fixups) const; /// getAdrLabelOpValue - Return encoding info for 12-bit immediate /// ADR label target. @@ -166,7 +168,7 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm(); switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); + default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::da: return 0; case ARM_AM::ia: return 1; case ARM_AM::db: return 2; @@ -177,7 +179,6 @@ public: /// unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const { switch (ShOpc) { - default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::no_shift: case ARM_AM::lsl: return 0; case ARM_AM::lsr: return 1; @@ -185,7 +186,7 @@ public: case ARM_AM::ror: case ARM_AM::rrx: return 3; } - return 0; + llvm_unreachable("Invalid ShiftOpc!"); } /// getAddrMode2OpValue - Return encoding for addrmode2 operands. @@ -423,7 +424,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } llvm_unreachable("Unable to encode MCOperand!"); - return 0; } /// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand. @@ -466,7 +466,7 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected branch target type!"); const MCExpr *Expr = MO.getExpr(); MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); // All of the information is in the fixup. return 0; @@ -594,17 +594,26 @@ getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, } uint32_t ARMMCCodeEmitter:: -getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, +getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand MO = MI.getOperand(OpIdx); if (MO.isExpr()) { if (HasConditionalBranch(MI)) - return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_condbranch, Fixups); - return ::getBranchTargetOpValue(MI, OpIdx, - ARM::fixup_arm_uncondbranch, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbl, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups); } + return MO.getImm() >> 2; +} + +uint32_t ARMMCCodeEmitter:: +getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups) const { + const MCOperand MO = MI.getOperand(OpIdx); + if (MO.isExpr()) + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups); + return MO.getImm() >> 1; } @@ -718,12 +727,13 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12); else Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else { Reg = ARM::PC; int32_t Offset = MO.getImm(); + // FIXME: Handle #-0. if (Offset < 0) { Offset *= -1; isAdd = false; @@ -791,8 +801,8 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); - MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else @@ -833,7 +843,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // but this is good enough for now. static bool EvaluateAsPCRel(const MCExpr *Expr) { switch (Expr->getKind()) { - default: assert(0 && "Unexpected expression type"); + default: llvm_unreachable("Unexpected expression type"); case MCExpr::SymbolRef: return false; case MCExpr::Binary: return true; } @@ -857,7 +867,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, MCFixupKind Kind; switch (ARM16Expr->getKind()) { - default: assert(0 && "Unsupported ARMFixup"); + default: llvm_unreachable("Unsupported ARMFixup"); case ARMMCExpr::VK_ARM_HI16: if (!isTargetDarwin() && EvaluateAsPCRel(E)) Kind = MCFixupKind(isThumb2() @@ -879,12 +889,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, : ARM::fixup_arm_movw_lo16); break; } - Fixups.push_back(MCFixup::Create(0, E, Kind)); + Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); return 0; }; llvm_unreachable("Unsupported MCExpr type in MCOperand!"); - return 0; } uint32_t ARMMCCodeEmitter:: @@ -993,6 +1002,19 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); + + // If The first operand isn't a register, we have a label reference. + if (!MO.isReg()) { + unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + + ++MCNumCPRelocations; + return (Rn << 9) | (1 << 13); + } unsigned Rn = getARMRegisterNumbering(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; @@ -1066,7 +1088,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, Kind = MCFixupKind(ARM::fixup_t2_pcrel_10); else Kind = MCFixupKind(ARM::fixup_arm_pcrel_10); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); ++MCNumCPRelocations; } else { @@ -1312,8 +1334,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, // LDM/STM: // {15-0} = Bitfield of GPRs. unsigned Reg = MI.getOperand(Op).getReg(); - bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); - bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); + bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); + bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); unsigned Binary = 0; @@ -1372,11 +1394,11 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, switch (Imm.getImm()) { default: break; - case 2: - case 4: case 8: - case 16: Align = 0x00; break; - case 32: Align = 0x03; break; + case 16: + case 32: // Default '0' value for invalid alignments of 8, 16, 32 bytes. + case 2: Align = 0x00; break; + case 4: Align = 0x03; break; } return RegNo | (Align << 4); @@ -1412,7 +1434,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return MO.getReg(); + return getARMRegisterNumbering(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 2727ba8..22e14a2 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -21,7 +21,7 @@ ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, void ARMMCExpr::PrintImpl(raw_ostream &OS) const { switch (Kind) { - default: assert(0 && "Invalid kind!"); + default: llvm_unreachable("Invalid kind!"); case VK_ARM_HI16: OS << ":upper16:"; break; case VK_ARM_LO16: OS << ":lower16:"; break; } @@ -45,8 +45,7 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { switch (Value->getKind()) { case MCExpr::Target: - assert(0 && "Can't handle nested target expr!"); - break; + llvm_unreachable("Can't handle nested target expr!"); case MCExpr::Constant: break; diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index 0a2e883..a727e08 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -1,4 +1,4 @@ -//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===// +//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index a55c410..e3512cd 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===// +//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===// // // The LLVM Compiler Infrastructure // @@ -89,14 +89,6 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) { ARMArchFeature += ",+thumb-mode"; } - Triple TheTriple(TT); - if (TheTriple.getOS() == Triple::NativeClient) { - if (ARMArchFeature.empty()) - ARMArchFeature = "+nacl-mode"; - else - ARMArchFeature += ",+nacl-mode"; - } - return ARMArchFeature; } @@ -137,14 +129,15 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { Triple TheTriple(TT); // Default relocation model on Darwin is PIC, not DynamicNoPIC. RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; } - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } @@ -158,22 +151,23 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, Triple TheTriple(TT); if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + return createMachOStreamer(Ctx, MAB, OS, Emitter, false); if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); - return NULL; } - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); + return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, STI); + return new ARMInstPrinter(MAI, MII, MRI, STI); return 0; } diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 9b3d3bd..88472d7 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -46,6 +46,10 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT); +/// createARMELFObjectWriter - Construct an ELF Mach-O object writer. +MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, + uint8_t OSABI); + /// createARMMachObjectWriter - Construct an ARM Mach-O object writer. MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, bool Is64Bit, diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 352c73e..8057cb6 100644 --- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -13,9 +13,11 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCMachOSymbolFlags.h" #include "llvm/MC/MCValue.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ErrorHandling.h" @@ -32,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { MCValue Target, unsigned Log2Size, uint64_t &FixedValue); - void RecordARMMovwMovtRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue); + void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue); public: ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, @@ -80,6 +82,9 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: + case ARM::fixup_arm_blx: RelocType = unsigned(macho::RIT_ARM_Branch24Bit); // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); @@ -98,34 +103,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, Log2Size = llvm::Log2_32(4); return true; + // For movw/movt r_type relocations they always have a pair following them and + // the r_length bits are used differently. The encoding of the r_length is as + // follows: + // low bit of r_length: + // 0 - :lower16: for movw instructions + // 1 - :upper16: for movt instructions + // high bit of r_length: + // 0 - arm instructions + // 1 - thumb instructions case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 1; + return true; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_HalfDifference); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 3; return true; case ARM::fixup_arm_movw_lo16: case ARM::fixup_arm_movw_lo16_pcrel: + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 0; + return true; case ARM::fixup_t2_movw_lo16: case ARM::fixup_t2_movw_lo16_pcrel: RelocType = unsigned(macho::RIT_ARM_Half); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); + Log2Size = 2; return true; } } void ARMMachObjectWriter:: -RecordARMMovwMovtRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - uint64_t &FixedValue) { +RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Type = macho::RIT_ARM_Half; @@ -135,7 +153,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); @@ -148,7 +167,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. @@ -178,9 +198,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: MovtBit = 1; + // The thumb bit shouldn't be set in the 'other-half' bit of the + // relocation, but it will be set in FixedValue if the base symbol + // is a thumb function. Clear it out here. + if (A_SD->getFlags() & SF_ThumbFunc) + FixedValue &= 0xfffffffe; break; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: + if (A_SD->getFlags() & SF_ThumbFunc) + FixedValue &= 0xfffffffe; MovtBit = 1; // Fallthrough case ARM::fixup_t2_movw_lo16: @@ -189,7 +216,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer, break; } - if (Type == macho::RIT_ARM_HalfDifference) { uint32_t OtherHalf = MovtBit ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16); @@ -233,7 +259,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) - report_fatal_error("symbol '" + A->getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + A->getName() + "' can not be undefined in a subtraction expression"); uint32_t Value = Writer->getSymbolAddress(A_SD, Layout); @@ -245,7 +272,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + + Asm.getContext().FatalError(Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); // Select the appropriate difference relocation type. @@ -287,19 +315,21 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; unsigned RelocType = macho::RIT_Vanilla; - if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) { - report_fatal_error("unknown ARM fixup kind!"); - return; - } + if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) + // If we failed to get fixup kind info, it's because there's no legal + // relocation type for the fixup kind. This happens when it's a fixup that's + // expected to always be resolvable at assembly time and not have any + // relocations needed. + Asm.getContext().FatalError(Fixup.getLoc(), + "unsupported relocation on symbol"); // If this is a difference or a defined symbol plus an offset, then we need a // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - if (RelocType == macho::RIT_ARM_Half || - RelocType == macho::RIT_ARM_HalfDifference) - return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, FixedValue); + if (RelocType == macho::RIT_ARM_Half) + return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, + Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); } @@ -374,6 +404,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, (Log2Size << 25) | (IsExtern << 27) | (Type << 28)); + + // Even when it's not a scattered relocation, movw/movt always uses + // a PAIR relocation. + if (Type == macho::RIT_ARM_Half) { + // The other-half value only gets populated for the movt relocation. + uint32_t Value = 0;; + switch ((unsigned)Fixup.getKind()) { + default: break; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + Value = FixedValue; + break; + } + macho::RelocationEntry MREPair; + MREPair.Word0 = Value; + MREPair.Word1 = ((0xffffff) | + (Log2Size << 25) | + (macho::RIT_Pair << 28)); + + Writer->addRelocation(Fragment->getParent(), MREPair); + } + Writer->addRelocation(Fragment->getParent(), MRE); } diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp index 2df0053..2899836 100644 --- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp +++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp @@ -1,4 +1,4 @@ -//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=// +//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===// // // The LLVM Compiler Infrastructure // @@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { // FIXME: Detect integer instructions properly. const MCInstrDesc &MCID = MI->getDesc(); unsigned Domain = MCID.TSFlags & ARMII::DomainMask; - if (MCID.mayStore()) + if (MI->mayStore()) return false; unsigned Opcode = MCID.getOpcode(); if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) @@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, const MCInstrDesc &MCID2 = TII->get(AddSubOpc); unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI)); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg) + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg) .addReg(Src1Reg, getKillRegState(Src1Kill)) .addReg(Src2Reg, getKillRegState(Src2Kill)); if (HasLane) MIB.addImm(LaneImm); MIB.addImm(Pred).addReg(PredReg); - MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2) + MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)); if (NegAcc) { @@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.isBarrier()) { + if (MI->isBarrier()) { clearStack(); Skip = 0; ++MII; diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index d848177..edd73c2 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -1,4 +1,4 @@ -//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====// +//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===// // // The LLVM Compiler Infrastructure // @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Thumb1FrameLowering.h" -#include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -101,7 +100,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R11: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { + if (STI.isTargetIOS()) { AFI->addGPRCalleeSavedArea2Frame(FI); GPRCS2Size += 4; } else { @@ -175,14 +174,14 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { +static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { for (unsigned i = 0; CSRegs[i]; ++i) if (Reg == CSRegs[i]) return true; return false; } -static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { +static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) @@ -214,7 +213,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { @@ -278,8 +277,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) - .addReg(ARM::R3, RegState::Kill)); + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) + .addReg(ARM::R3, RegState::Kill); + AddDefaultPred(MIB); + MIB->copyImplicitOps(&*MBBI); // erase the old tBX_RET instruction MBB.erase(MBBI); } @@ -350,6 +352,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + MIB->copyImplicitOps(&*MI); MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 218311d..e03e758 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===// +//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===// // // The LLVM Compiler Infrastructure // @@ -13,14 +13,11 @@ #include "Thumb1InstrInfo.h" #include "ARM.h" -#include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/ADT/SmallVector.h" -#include "Thumb1InstrInfo.h" +#include "llvm/MC/MCInst.h" using namespace llvm; @@ -28,6 +25,15 @@ Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(ARM::tMOVr); + NopInst.addOperand(MCOperand::CreateReg(ARM::R8)); + NopInst.addOperand(MCOperand::CreateReg(ARM::R8)); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); +} + unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } @@ -60,8 +66,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -89,8 +94,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h index 17ef2f7..36af204 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===// +//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,9 +14,8 @@ #ifndef THUMB1INSTRUCTIONINFO_H #define THUMB1INSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "ARM.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" namespace llvm { @@ -27,6 +26,9 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp index e8ed482..ef77bbd 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===// +//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "Thumb1RegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" -#include "Thumb1InstrInfo.h" -#include "Thumb1RegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -28,6 +27,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" @@ -570,6 +570,11 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // If this instruction affects R12, adjust our restore point. for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { const MachineOperand &MO = II->getOperand(i); + if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) { + UseMI = II; + done = true; + break; + } if (!MO.isReg() || MO.isUndef() || !MO.getReg() || TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; @@ -624,6 +629,21 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, FrameReg = BasePtr; } + // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the + // call frame setup/destroy instructions have already been eliminated. That + // means the stack pointer cannot be used to access the emergency spill slot + // when !hasReservedCallFrame(). +#ifndef NDEBUG + if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) && + "Cannot use SP to access the emergency spill slot in " + "functions without a reserved call frame"); + assert(!MF.getFrameInfo()->hasVarSizedObjects() && + "Cannot use SP to access the emergency spill slot in " + "functions with variable sized frame objects"); + } +#endif // NDEBUG + // Special handling of dbg_value instructions. if (MI.isDebugValue()) { MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); @@ -643,14 +663,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(Offset && "This code isn't needed if offset already handled!"); unsigned Opcode = MI.getOpcode(); - const MCInstrDesc &Desc = MI.getDesc(); // Remove predicate first. int PIdx = MI.findFirstPredOperandIdx(); if (PIdx != -1) removeOperands(MI, PIdx); - if (Desc.mayLoad()) { + if (MI.mayLoad()) { // Use the destination register to materialize sp + offset. unsigned TmpReg = MI.getOperand(0).getReg(); bool UseRR = false; @@ -673,7 +692,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame // register. The offset is already handled in the vreg value. MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); - } else if (Desc.mayStore()) { + } else if (MI.mayStore()) { VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); bool UseRR = false; @@ -695,11 +714,11 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // register. The offset is already handled in the vreg value. MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false); } else { - assert(false && "Unexpected opcode!"); + llvm_unreachable("Unexpected opcode!"); } // Add predicate back if it's needed. - if (MI.getDesc().isPredicable()) { + if (MI.isPredicable()) { MachineInstrBuilder MIB(&MI); AddDefaultPred(MIB); } diff --git a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h index 9060e59..6971842 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h @@ -16,13 +16,12 @@ #define THUMB1REGISTERINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" +#include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp index b627400..ecb4c2f 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===// +//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===// // // The LLVM Compiler Infrastructure // @@ -13,6 +13,7 @@ #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -75,7 +76,7 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { unsigned Reg = LocalUses[i]; Uses.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) Uses.insert(*Subreg); } @@ -83,7 +84,7 @@ static void TrackDefUses(MachineInstr *MI, for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Reg = LocalDefs[i]; Defs.insert(Reg); - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) Defs.insert(*Subreg); if (Reg == ARM::CPSR) @@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, // rsb r2, 0 // const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.hasOptionalDef() && + if (MI->hasOptionalDef() && MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR) return false; @@ -153,7 +154,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, ++I; if (I != E) { unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg); if (NCC == CC || NCC == OCC) return true; } @@ -170,7 +171,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; @@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. for (; MBBI != E && Pos && - (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { + (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) { if (MBBI->isDebugValue()) continue; @@ -206,7 +207,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MI = NMI; unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. @@ -237,6 +238,10 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { // Last instruction in IT block kills ITSTATE. LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); + // Finalize the bundle. + MachineBasicBlock::instr_iterator LI = LastITMI; + finalizeBundle(MBB, InsertPos.getInstrIterator(), llvm::next(LI)); + Modified = true; ++NumITs; } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index cf040c82..8ab486b 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===// +//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===// // // The LLVM Compiler Infrastructure // @@ -15,13 +15,11 @@ #include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" -#include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -35,6 +33,13 @@ Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(ARM::tNOP); + NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + NopInst.addOperand(MCOperand::CreateReg(0)); +} + unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { // FIXME return 0; @@ -53,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // If the first instruction of Tail is predicated, we may have to update // the IT instruction. unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg); MachineBasicBlock::iterator MBBI = Tail; if (CC != ARMCC::AL) // Expecting at least the t2IT instruction before it. @@ -101,7 +106,7 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, } unsigned PredReg = 0; - return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; + return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -130,8 +135,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -158,8 +162,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -570,7 +573,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, return; unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg); if (CC == ARMCC::AL || PredReg != ARM::CPSR) return; @@ -586,10 +589,10 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, continue; MachineInstr *NMI = &*MBBI; - ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); + ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg); if (!(NCC == CC || NCC == OCC) || NMI->modifiesRegister(SrcReg, &TRI) || - NMI->definesRegister(ARM::CPSR)) + NMI->modifiesRegister(ARM::CPSR, &TRI)) break; if (++NumInsts == 4) // Too many in a row! @@ -607,5 +610,5 @@ llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) return ARMCC::AL; - return llvm::getInstrPredicate(MI, PredReg); + return getInstrPredicate(MI, PredReg); } diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h index f2637d7..0911f8a 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===// +//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,9 +14,8 @@ #ifndef THUMB2INSTRUCTIONINFO_H #define THUMB2INSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "ARM.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "Thumb2RegisterInfo.h" namespace llvm { @@ -28,6 +27,9 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); + /// getNoopForMachoTarget - Return the noop instruction to use for a noop. + void getNoopForMachoTarget(MCInst &NopInst) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp index 355c3bf..29a87d0 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===// +//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "Thumb2RegisterInfo.h" #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "Thumb2InstrInfo.h" -#include "Thumb2RegisterInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" diff --git a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h index 824378a..6b397e8 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h @@ -16,13 +16,12 @@ #define THUMB2REGISTERINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" +#include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { public: diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp index 89a155c..b5a397e 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -39,9 +39,9 @@ namespace { /// ReduceTable - A static table with information on mapping from wide /// opcodes to narrow struct ReduceEntry { - unsigned WideOpc; // Wide opcode - unsigned NarrowOpc1; // Narrow opcode to transform to - unsigned NarrowOpc2; // Narrow opcode when it's two-address + uint16_t WideOpc; // Wide opcode + uint16_t NarrowOpc1; // Narrow opcode to transform to + uint16_t NarrowOpc2; // Narrow opcode when it's two-address uint8_t Imm1Limit; // Limit of immediate field (bits) uint8_t Imm2Limit; // Limit of immediate field when it's two-address unsigned LowRegs1 : 1; // Only possible if low-registers are used @@ -146,7 +146,8 @@ namespace { /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. DenseMap<unsigned, unsigned> ReduceOpcodeMap; - bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); + bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, + bool IsSelfLoop); bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, @@ -157,19 +158,21 @@ namespace { bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, - MachineInstr *CPSRDef); + MachineInstr *CPSRDef, bool IsSelfLoop); /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// instruction. bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef); + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop); /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// non-two-address instruction. bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef); + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop); /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); @@ -186,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { } static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { - for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs) + for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) if (*Regs == ARM::CPSR) return true; return false; @@ -210,10 +213,17 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { /// In this case it would have been ok to narrow the mul.w to muls since there /// are indirect RAW dependency between the muls and the mul.w bool -Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { - if (!Def || !STI->avoidCPSRPartialUpdate()) +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, + bool FirstInSelfLoop) { + // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder). + if (!STI->avoidCPSRPartialUpdate()) return false; + if (!Def) + // If this BB loops back to itself, conservatively avoid narrowing the + // first instruction that does partial flag update. + return FirstInSelfLoop; + SmallSet<unsigned, 2> Defs; for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { const MachineOperand &MO = Def->getOperand(i); @@ -442,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, // Add the 16-bit load / store instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)); MIB.addOperand(MI->getOperand(1)); @@ -468,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumLdSts; return true; } @@ -476,15 +486,16 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::t2ADDri) { // If the source register is SP, try to reduce to tADDrSPi, otherwise // it's a normal reduce. if (MI->getOperand(1).getReg() != ARM::SP) { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } // Try to reduce to tADDrSPi. unsigned Imm = MI->getOperand(2).getImm(); @@ -502,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR) return false; - MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), + MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(ARM::tADDrSPi)) .addOperand(MI->getOperand(0)) .addOperand(MI->getOperand(1)) @@ -514,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " <<*MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumNarrows; return true; } @@ -522,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.mayLoad() || MCID.mayStore()) + if (MI->mayLoad() || MI->mayStore()) return ReduceLoadStore(MBB, MI, Entry); switch (Opc) { @@ -535,12 +545,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, switch (Opc) { default: break; case ARM::t2ADDSri: { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; // fallthrough } case ARM::t2ADDSrr: - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } } break; @@ -552,13 +562,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2UXTB: case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two @@ -568,9 +578,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; - if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } } return false; @@ -579,14 +589,32 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); - if (Reg0 != Reg1) { + // t2MUL is "special". The tied source operand is second, not first. + if (MI->getOpcode() == ARM::t2MUL) { + unsigned Reg2 = MI->getOperand(2).getReg(); + // Early exit if the regs aren't all low regs. + if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1) + || !isARMLowRegister(Reg2)) + return false; + if (Reg0 != Reg2) { + // If the other operand also isn't the same as the destination, we + // can't reduce. + if (Reg1 != Reg0) + return false; + // Try to commute the operands to make it a 2-address instruction. + MachineInstr *CommutedMI = TII->commuteInstruction(MI); + if (!CommutedMI) + return false; + } + } else if (Reg0 != Reg1) { // Try to commute the operands to make it a 2-address instruction. unsigned CommOpIdx1, CommOpIdx2; if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || @@ -637,12 +665,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && - canAddPseudoFlagDep(CPSRDef, MI)) + canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) @@ -666,7 +694,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++Num2Addrs; return true; } @@ -674,7 +702,8 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; @@ -727,12 +756,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && - canAddPseudoFlagDep(CPSRDef, MI)) + canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) return false; // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); - MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID); + MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID); MIB.addOperand(MI->getOperand(0)); if (NewMCID.hasOptionalDef()) { if (HasCC) @@ -772,7 +801,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); - MBB.erase(MI); + MBB.erase_instr(MI); ++NumNarrows; return true; } @@ -817,13 +846,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; + MachineInstr *BundleMI = 0; - MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); - MachineBasicBlock::iterator NextMII; + // If this BB loops back to itself, conservatively avoid narrowing the + // first instruction that does partial flag update. + bool IsSelfLoop = MBB.isSuccessor(&MBB); + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); + MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); MachineInstr *MI = &*MII; + if (MI->isBundle()) { + BundleMI = MI; + continue; + } + LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); unsigned Opcode = MI->getOpcode(); @@ -832,9 +870,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } goto ProcessNext; @@ -842,31 +880,46 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Try to transform to a 16-bit two-address instruction. if (Entry.NarrowOpc2 && - ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; goto ProcessNext; } // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && - ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; - MachineBasicBlock::iterator I = prior(NextMII); + MachineBasicBlock::instr_iterator I = prior(NextMII); MI = &*I; } } ProcessNext: + if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) { + // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill + // marker is only on the BUNDLE instruction. Process the BUNDLE + // instruction as we finish with the bundled instruction to work around + // the inconsistency. + if (BundleMI->killsRegister(ARM::CPSR)) + LiveCPSR = false; + MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR); + if (MO && !MO->isDead()) + LiveCPSR = true; + } + bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); - if (MI->getDesc().isCall()) + if (MI->isCall()) { // Calls don't really set CPSR. CPSRDef = 0; - else if (DefCPSR) + IsSelfLoop = false; + } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; + IsSelfLoop = false; + } } return Modified; |