diff options
Diffstat (limited to 'lib/Target/ARM')
36 files changed, 2185 insertions, 656 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index b08f942..ae7ae59 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -48,7 +48,7 @@ namespace ARMCC { AL }; - inline static CondCodes getOppositeCondition(CondCodes CC){ + inline static CondCodes getOppositeCondition(CondCodes CC) { switch (CC) { default: llvm_unreachable("Unknown condition code"); case EQ: return NE; @@ -67,7 +67,7 @@ namespace ARMCC { case LE: return GT; } } -} +} // namespace ARMCC inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { switch (CC) { @@ -90,6 +90,10 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } +/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model +/// operations involving sub-registers. +bool ModelWithRegSequence(); + FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b4dec0c..f1e6a9f 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -32,6 +32,8 @@ def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", "ARM v6t2">; def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", "ARM v7A">; +def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", + "ARM v7M">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", "Enable VFP2 instructions">; def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", @@ -42,6 +44,10 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", "Enable Thumb2 instructions">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", + "Enable divide instructions">; +def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", + "Enable Thumb2 extract and pack instructions">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -123,9 +129,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureNEONForFP]>; + FeatureNEONForFP, FeatureT2ExtractPack]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON]>; + [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; +def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; +def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index a193858..2528854 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" @@ -196,6 +197,42 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return NewMIs[0]; } +bool +ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + bool isKill = true; + + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress + // then it's already added to the function and entry block live-in sets. + if (Reg == ARM::LR) { + MachineFunction &MF = *MBB.getParent(); + if (MF.getFrameInfo()->isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; + } + + if (isKill) + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. The register is killed at the spill + // + storeRegToStackSlot(MBB, MI, Reg, isKill, + CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI); + } + return true; +} + // Branch analysis. bool ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, @@ -481,6 +518,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_longjmp: + return 16; + case ARM::tInt_eh_sjlj_longjmp: + return 10; case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: return 24; @@ -540,16 +581,17 @@ bool ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - switch (MI.getOpcode()) { default: break; case ARM::VMOVS: case ARM::VMOVD: case ARM::VMOVDneon: - case ARM::VMOVQ: { + case ARM::VMOVQ: + case ARM::VMOVQQ : { SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); return true; } case ARM::MOVr: @@ -564,6 +606,8 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, "Invalid ARM MOV instruction"); SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); return true; } } @@ -654,10 +698,8 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. if (DestRC == ARM::tGPRRegisterClass) @@ -679,6 +721,12 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, SrcRC == ARM::QPR_8RegisterClass) SrcRC = ARM::QPRRegisterClass; + // Allow QQPR / QQPR_VFP2 cross-class copies. + if (DestRC == ARM::QQPR_VFP2RegisterClass) + DestRC = ARM::QQPRRegisterClass; + if (SrcRC == ARM::QQPR_VFP2RegisterClass) + SrcRC = ARM::QQPRRegisterClass; + // Disallow copies of unequal sizes. if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) return false; @@ -703,20 +751,36 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = ARM::VMOVDneon; else if (DestRC == ARM::QPRRegisterClass) Opc = ARM::VMOVQ; + else if (DestRC == ARM::QQPRRegisterClass) + Opc = ARM::VMOVQQ; + else if (DestRC == ARM::QQQQPRRegisterClass) + Opc = ARM::VMOVQQQQ; else return false; - AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg)); + AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg)); } return true; } +static const +MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, + unsigned Reg, unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) { + if (!SubIdx) + return MIB.addReg(Reg, State); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); + return MIB.addReg(Reg, State, SubIdx); +} + void ARMBaseInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -738,45 +802,82 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else { - assert((RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); + } else if (RC == ARM::QPRRegisterClass || + RC == ARM::QPR_VFP2RegisterClass || + RC == ARM::QPR_8RegisterClass) { // FIXME: Neon instructions should support predicates - if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) .addFrameIndex(FI).addImm(128) - .addMemOperand(MMO) - .addReg(SrcReg, getKillRegState(isKill))); + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)). - addReg(SrcReg, getKillRegState(isKill)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) + .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } + } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + // FIXME: It's possible to only store part of the QQ register if the + // spilled def has a sub-register index. + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32)) + .addFrameIndex(FI).addImm(128); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + AddDefaultPred(MIB.addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + } + } else { + assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); } } void ARMBaseInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), MachineMemOperand::MOLoad, 0, @@ -791,20 +892,18 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else { - assert((RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); - if (Align >= 16 - && (getRegisterInfo().canRealignStack(MF))) { + } else if (RC == ARM::QPRRegisterClass || + RC == ARM::QPR_VFP2RegisterClass || + RC == ARM::QPR_8RegisterClass) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) .addFrameIndex(FI).addImm(128) .addMemOperand(MMO)); @@ -814,6 +913,40 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } + } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32)); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + } + } else { + assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); } } @@ -930,8 +1063,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } - } - else if (Opc == ARM::VMOVD) { + } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -957,6 +1089,56 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } + } else if (Opc == ARM::VMOVQ) { + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + if (MFI.getObjectAlignment(FI) >= 16 && + getRegisterInfo().canRealignStack(MF)) { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q)) + .addFrameIndex(FI).addImm(128) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) + .addImm(Pred).addReg(PredReg); + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ)) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) + .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(Pred).addReg(PredReg); + } + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstSubReg = MI->getOperand(0).getSubReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + if (MFI.getObjectAlignment(FI) >= 16 && + getRegisterInfo().canRealignStack(MF)) { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef), + DstSubReg) + .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg); + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef), + DstSubReg) + .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(Pred).addReg(PredReg); + } + } } return NewMI; @@ -985,12 +1167,13 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, Opc == ARM::tMOVtgpr2gpr || Opc == ARM::tMOVgpr2tgpr) { return true; - } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) { + } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD || + Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { return true; - } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { - return false; // FIXME } + // FIXME: VMOVQQ and VMOVQQQQ? + return false; } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 7a5630e..b566271 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -200,6 +200,11 @@ public: virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; const ARMSubtarget &getSubtarget() const { return Subtarget; } + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -257,17 +262,20 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index bc12187..82458d2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -259,10 +259,10 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, unsigned SubIdx) const { switch (SubIdx) { default: return 0; - case 1: - case 2: - case 3: - case 4: + case ARM::ssub_0: + case ARM::ssub_1: + case ARM::ssub_2: + case ARM::ssub_3: { // S sub-registers. if (A->getSize() == 8) { if (B == &ARM::SPR_8RegClass) @@ -273,22 +273,201 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &ARM::DPR_VFP2RegClass; } - assert(A->getSize() == 16 && "Expecting a Q register class!"); - if (B == &ARM::SPR_8RegClass) - return &ARM::QPR_8RegClass; - return &ARM::QPR_VFP2RegClass; - case 5: - case 6: - // D sub-registers. - if (B == &ARM::DPR_VFP2RegClass) + if (A->getSize() == 16) { + if (B == &ARM::SPR_8RegClass) + return &ARM::QPR_8RegClass; return &ARM::QPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return &ARM::QPR_8RegClass; + } + + if (A->getSize() == 32) { + if (B == &ARM::SPR_8RegClass) + return 0; // Do not allow coalescing! + return &ARM::QQPR_VFP2RegClass; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + return 0; // Do not allow coalescing! + } + case ARM::dsub_0: + case ARM::dsub_1: + case ARM::dsub_2: + case ARM::dsub_3: { + // D sub-registers. + if (A->getSize() == 16) { + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + if (A->getSize() == 32) { + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QQPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + if (B != &ARM::DPRRegClass) + return 0; // Do not allow coalescing! return A; } + case ARM::dsub_4: + case ARM::dsub_5: + case ARM::dsub_6: + case ARM::dsub_7: { + // D sub-registers of QQQQ registers. + if (A->getSize() == 64 && B == &ARM::DPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + + case ARM::qsub_0: + case ARM::qsub_1: { + // Q sub-registers. + if (A->getSize() == 32) { + if (B == &ARM::QPR_VFP2RegClass) + return &ARM::QQPR_VFP2RegClass; + if (B == &ARM::QPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + if (B == &ARM::QPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + case ARM::qsub_2: + case ARM::qsub_3: { + // Q sub-registers of QQQQ registers. + if (A->getSize() == 64 && B == &ARM::QPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + } return 0; } +bool +ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &SubIndices, + unsigned &NewSubIdx) const { + + unsigned Size = RC->getSize() * 8; + if (Size < 6) + return 0; + + NewSubIdx = 0; // Whole register. + unsigned NumRegs = SubIndices.size(); + if (NumRegs == 8) { + // 8 D registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[0] == ARM::dsub_0 && + SubIndices[1] == ARM::dsub_1 && + SubIndices[2] == ARM::dsub_2 && + SubIndices[3] == ARM::dsub_3 && + SubIndices[4] == ARM::dsub_4 && + SubIndices[5] == ARM::dsub_5 && + SubIndices[6] == ARM::dsub_6 && + SubIndices[7] == ARM::dsub_7); + } else if (NumRegs == 4) { + if (SubIndices[0] == ARM::qsub_0) { + // 4 Q registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[1] == ARM::qsub_1 && + SubIndices[2] == ARM::qsub_2 && + SubIndices[3] == ARM::qsub_3); + } else if (SubIndices[0] == ARM::dsub_0) { + // 4 D registers -> 1 QQ register. + if (Size >= 256 && + SubIndices[1] == ARM::dsub_1 && + SubIndices[2] == ARM::dsub_2 && + SubIndices[3] == ARM::dsub_3) { + if (Size == 512) + NewSubIdx = ARM::qqsub_0; + return true; + } + } else if (SubIndices[0] == ARM::dsub_4) { + // 4 D registers -> 1 QQ register (2nd). + if (Size == 512 && + SubIndices[1] == ARM::dsub_5 && + SubIndices[2] == ARM::dsub_6 && + SubIndices[3] == ARM::dsub_7) { + NewSubIdx = ARM::qqsub_1; + return true; + } + } else if (SubIndices[0] == ARM::ssub_0) { + // 4 S registers -> 1 Q register. + if (Size >= 128 && + SubIndices[1] == ARM::ssub_1 && + SubIndices[2] == ARM::ssub_2 && + SubIndices[3] == ARM::ssub_3) { + if (Size >= 256) + NewSubIdx = ARM::qsub_0; + return true; + } + } + } else if (NumRegs == 2) { + if (SubIndices[0] == ARM::qsub_0) { + // 2 Q registers -> 1 QQ register. + if (Size >= 256 && SubIndices[1] == ARM::qsub_1) { + if (Size == 512) + NewSubIdx = ARM::qqsub_0; + return true; + } + } else if (SubIndices[0] == ARM::qsub_2) { + // 2 Q registers -> 1 QQ register (2nd). + if (Size == 512 && SubIndices[1] == ARM::qsub_3) { + NewSubIdx = ARM::qqsub_1; + return true; + } + } else if (SubIndices[0] == ARM::dsub_0) { + // 2 D registers -> 1 Q register. + if (Size >= 128 && SubIndices[1] == ARM::dsub_1) { + if (Size >= 256) + NewSubIdx = ARM::qsub_0; + return true; + } + } else if (SubIndices[0] == ARM::dsub_2) { + // 2 D registers -> 1 Q register (2nd). + if (Size >= 256 && SubIndices[1] == ARM::dsub_3) { + NewSubIdx = ARM::qsub_1; + return true; + } + } else if (SubIndices[0] == ARM::dsub_4) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::dsub_5) { + NewSubIdx = ARM::qsub_2; + return true; + } + } else if (SubIndices[0] == ARM::dsub_6) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::dsub_7) { + NewSubIdx = ARM::qsub_3; + return true; + } + } else if (SubIndices[0] == ARM::ssub_0) { + // 2 S registers -> 1 D register. + if (SubIndices[1] == ARM::ssub_1) { + if (Size >= 128) + NewSubIdx = ARM::dsub_0; + return true; + } + } else if (SubIndices[0] == ARM::ssub_2) { + // 2 S registers -> 1 D register (2nd). + if (Size >= 128 && SubIndices[1] == ARM::ssub_3) { + NewSubIdx = ARM::dsub_1; + return true; + } + } + } + return false; +} + + const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; @@ -481,7 +660,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((DisableFramePointerElim(MF) && MFI->hasCalls())|| + return ((DisableFramePointerElim(MF) && MFI->adjustsStack())|| needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -509,7 +688,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (DisableFramePointerElim(MF) && MFI->hasCalls()) + if (DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); @@ -545,24 +724,25 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { I != E; ++I) { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (!I->getOperand(i).isFI()) continue; - - const TargetInstrDesc &Desc = TII.get(I->getOpcode()); - unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - if (AddrMode == ARMII::AddrMode3 || - AddrMode == ARMII::AddrModeT2_i8) - return (1 << 8) - 1; - - if (AddrMode == ARMII::AddrMode5 || - AddrMode == ARMII::AddrModeT2_i8s4) + switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode3: + case ARMII::AddrModeT2_i8: + Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode5: + case ARMII::AddrModeT2_i8s4: Limit = std::min(Limit, ((1U << 8) - 1) * 4); - - if (AddrMode == ARMII::AddrModeT2_i12 && hasFP(MF)) - // When the stack offset is negative, we will end up using - // the i8 instructions instead. - return (1 << 8) - 1; - - if (AddrMode == ARMII::AddrMode6) + break; + case ARMII::AddrModeT2_i12: + if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode6: + // Addressing mode 6 (load/store) instructions can't encode an + // immediate offset for stack references. return 0; + default: + break; + } break; // At most one FI per instruction } } @@ -750,7 +930,9 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS1GPRs.empty()) { unsigned Reg = UnspilledCS1GPRs.back(); UnspilledCS1GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { + if (!isReservedReg(MF, Reg) && + (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || + Reg == ARM::LR)) { Extras.push_back(Reg); NumExtras--; } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 456c392..2c9c82d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -81,6 +81,15 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + /// canCombinedSubRegIndex - Given a register class and a list of sub-register + /// indices, return true if it's possible to combine the sub-register indices + /// into one that corresponds to a larger sub-register. Return the new sub- + /// register index by reference. Note the new index by be zero if the given + /// sub-registers combined to form the whole register. + virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &SubIndices, + unsigned &NewSubIdx) const; + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index f84f85a..f2730fc 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -88,6 +88,7 @@ namespace { void emitWordLE(unsigned Binary); void emitDWordLE(uint64_t Binary); void emitConstPoolInstruction(const MachineInstr &MI); + void emitMOVi32immInstruction(const MachineInstr &MI); void emitMOVi2piecesInstruction(const MachineInstr &MI); void emitLEApcrelJTInstruction(const MachineInstr &MI); void emitPseudoMoveInstruction(const MachineInstr &MI); @@ -145,6 +146,15 @@ namespace { return getMachineOpValue(MI, MI.getOperand(OpIdx)); } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the + /// machine operand requires relocation, record the relocation and return zero. + unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, + unsigned Reloc); + unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, + unsigned Reloc) { + return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc); + } + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// unsigned getShiftOp(unsigned Imm) const ; @@ -217,6 +227,31 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { return 0; } +/// getMovi32Value - Return binary encoding of operand for movw/movt. If the +/// machine operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, + const MachineOperand &MO, + unsigned Reloc) { + assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) + && "Relocation to this function should be for movt or movw"); + + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), Reloc, true, false); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), Reloc); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), Reloc); + else { +#ifndef NDEBUG + errs() << MO; +#endif + llvm_unreachable("Unsupported operand type for movw/movt"); + } + return 0; +} + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, @@ -438,6 +473,42 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { } } +void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + + // Emit the 'movw' instruction. + unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 + + unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm12 + Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12 + Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4 + emitWordLE(Binary); + + unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16; + // Emit the 'movt' instruction. + Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm1, same as movw above. + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); +} + void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { const MachineOperand &MO0 = MI.getOperand(0); const MachineOperand &MO1 = MI.getOperand(1); @@ -557,7 +628,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); - // FIXME: Add support for MOVimm32. case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -604,6 +674,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { emitMiscLoadStoreInstruction(MI, ARM::PC); break; } + + case ARM::MOVi32imm: + emitMOVi32immInstruction(MI); + break; + case ARM::MOVi2pieces: // Two instructions to materialize a constant. emitMOVi2piecesInstruction(MI); @@ -706,10 +781,6 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); - if (TID.Opcode == ARM::BFC) { - report_fatal_error("ARMv6t2 JIT is not yet supported."); - } - // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -729,6 +800,45 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + if (TID.Opcode == ARM::MOVi16) { + // Get immediate from MI. + unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movw); + // Encode imm which is the same as in emitMOVi32immInstruction(). + Binary |= Lo16 & 0xFFF; + Binary |= ((Lo16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if(TID.Opcode == ARM::MOVTi16) { + unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movt) >> 16); + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) { + uint32_t v = ~MI.getOperand(2).getImm(); + int32_t lsb = CountTrailingZeros_32(v); + int32_t msb = (32 - CountLeadingZeros_32(v)) - 1; + // Instr{20-16} = msb, Instr{11-7} = lsb + Binary |= (msb & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) { + // Encode Rn in Instr{0-3} + Binary |= getMachineOpValue(MI, OpIdx++); + + uint32_t lsb = MI.getOperand(OpIdx++).getImm(); + uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1; + + // Instr{20-16} = widthm1, Instr{11-7} = lsb + Binary |= (widthm1 & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } + // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) ++OpIdx; @@ -1366,18 +1476,66 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { break; ++NumRegs; } - Binary |= NumRegs * 2; + // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0) + // Otherwise, it will be 0, in the case of 32-bit registers. + if(Binary & 0x100) + Binary |= NumRegs * 2; + else + Binary |= NumRegs; emitWordLE(Binary); } void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; + switch(Opcode) { + default: + llvm_unreachable("ARMCodeEmitter::emitMiscInstruction"); + + case ARM::FMSTAT: + // No further encoding needed. + break; + + case ARM::VMRS: + case ARM::VMSR: { + const MachineOperand &MO0 = MI.getOperand(0); + // Encode Rt. + Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg()) + << ARMII::RegRdShift; + break; + } + + case ARM::FCONSTD: + case ARM::FCONSTS: { + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 0); + + // Encode imm., Table A7-18 VFP modified immediate constants + const MachineOperand &MO1 = MI.getOperand(1); + unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF() + .bitcastToAPInt().getHiBits(32).getLimitedValue()); + unsigned ModifiedImm; + + if(Opcode == ARM::FCONSTS) + ModifiedImm = (Imm & 0x80000000) >> 24 | // a + (Imm & 0x03F80000) >> 19; // bcdefgh + else // Opcode == ARM::FCONSTD + ModifiedImm = (Imm & 0x80000000) >> 24 | // a + (Imm & 0x007F0000) >> 16; // bcdefgh + + // Insts{19-16} = abcd, Insts{3-0} = efgh + Binary |= ((ModifiedImm & 0xF0) >> 4) << 16; + Binary |= (ModifiedImm & 0xF); + break; + } + } + emitWordLE(Binary); } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 845d088..c87f5d7 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -29,6 +29,7 @@ namespace { ARMExpandPseudo() : MachineFunctionPass(&ID) {} const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -37,11 +38,31 @@ namespace { } private: + void TransferImpOps(MachineInstr &OldMI, + MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); }; char ARMExpandPseudo::ID = 0; } +/// TransferImpOps - Transfer implicit operands on the pseudo instruction to +/// the instructions created from the expansion. +void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, + MachineInstrBuilder &UseMI, + MachineInstrBuilder &DefMI) { + const TargetInstrDesc &Desc = OldMI.getDesc(); + for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = OldMI.getOperand(i); + assert(MO.isReg() && MO.getReg()); + if (MO.isUse()) + UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill())); + else + DefMI.addReg(MO.getReg(), + getDefRegState(true) | getDeadRegState(MO.isDead())); + } +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -58,52 +79,82 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; unsigned DstReg = MI.getOperand(0).getReg(); - if (!MI.getOperand(0).isDead()) { - MachineInstr *NewMI = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(NewLdOpc), DstReg) - .addOperand(MI.getOperand(1))); - NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) - .addReg(DstReg, getDefRegState(true)) - .addReg(DstReg) - .addOperand(MI.getOperand(2)); - } + bool DstIsDead = MI.getOperand(0).isDead(); + MachineInstrBuilder MIB1 = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(NewLdOpc), DstReg) + .addOperand(MI.getOperand(1))); + (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::tPICADD)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addOperand(MI.getOperand(2)); + TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); Modified = true; break; } + case ARM::t2MOVi32imm: { + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); - if (!MI.getOperand(0).isDead()) { - const MachineOperand &MO = MI.getOperand(1); - MachineInstrBuilder LO16, HI16; - - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), - DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true)).addReg(DstReg); - - if (MO.isImm()) { - unsigned Imm = MO.getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - LO16 = LO16.addImm(Lo16); - HI16 = HI16.addImm(Hi16); - } else { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); - // FIXME: What's about memoperands? - } - AddDefaultPred(LO16); - AddDefaultPred(HI16); + bool DstIsDead = MI.getOperand(0).isDead(); + const MachineOperand &MO = MI.getOperand(1); + MachineInstrBuilder LO16, HI16; + + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); } + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); + Modified = true; + break; + } + + case ARM::VMOVQQ: { + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0); + unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1); + unsigned SrcReg = MI.getOperand(1).getReg(); + bool SrcIsKill = MI.getOperand(1).isKill(); + unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0); + unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1); + MachineInstrBuilder Even = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::VMOVQ)) + .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); + MachineInstrBuilder Odd = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::VMOVQ)) + .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); + TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); Modified = true; } - // FIXME: expand t2MOVi32imm } MBBI = NMBBI; } @@ -113,6 +164,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 616942c..9baef6b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -37,7 +37,8 @@ using namespace llvm; static cl::opt<bool> UseRegSeq("neon-reg-sequence", cl::Hidden, - cl::desc("Use reg_sequence to model ld / st of multiple neon regs")); + cl::desc("Use reg_sequence to model ld / st of multiple neon regs"), + cl::init(true)); //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine @@ -164,15 +165,34 @@ private: ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); + SDNode *SelectConcatVector(SDNode *N); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); - /// PairDRegs - Insert a pair of double registers into an implicit def to - /// form a quad register. + /// PairDRegs - Form a quad register from a pair of D registers. + /// SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); + + /// PairDRegs - Form a quad register pair from a pair of Q registers. + /// + SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); + + /// QuadDRegs - Form a quad register pair from a quad of D registers. + /// + SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + + /// QuadQRegs - Form 4 consecutive Q registers. + /// + SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + + /// OctoDRegs - Form 8 consecutive D registers. + /// + SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, + SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; } @@ -940,13 +960,13 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -/// PairDRegs - Insert a pair of double registers into an implicit def to -/// form a quad register. +/// PairDRegs - Form a quad register from a pair of D registers. +/// SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); - SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); - SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); - if (UseRegSeq) { + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + if (llvm::ModelWithRegSequence()) { const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } @@ -958,6 +978,62 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { VT, SDValue(Pair, 0), V1, SubReg1); } +/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. +/// +SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + +/// QuadDRegs - Form 4 consecutive D registers. +/// +SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + +/// QuadQRegs - Form 4 consecutive Q registers. +/// +SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + +/// OctoDRegs - Form 8 consecutive D registers. +/// +SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3, + SDValue V4, SDValue V5, + SDValue V6, SDValue V7) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); + SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32); + SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32); + SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32); + SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32); + const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3, + V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16); +} + /// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type /// for a 64-bit subregister of the vector. static EVT GetNEONSubregVT(EVT VT) { @@ -1011,7 +1087,34 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + if (!llvm::ModelWithRegSequence() || NumVecs < 2) + return VLd; + + SDValue RegSeq; + SDValue V0 = SDValue(VLd, 0); + SDValue V1 = SDValue(VLd, 1); + + // Form a REG_SEQUENCE to force register allocation. + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else { + SDValue V2 = SDValue(VLd, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLd, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), D); + } + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); + return NULL; } EVT RegVT = GetNEONSubregVT(VT); @@ -1026,9 +1129,24 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, Chain = SDValue(VLd, 2 * NumVecs); // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + if (NumVecs == 1) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); + } else { + SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, + SDValue(VLd, 0), SDValue(VLd, 1), + SDValue(VLd, 2), SDValue(VLd, 3)), 0); + SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); + SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); + } + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1051,10 +1169,37 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); - // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + SDValue V0 = SDValue(VLdA, 0); + SDValue V1 = SDValue(VLdB, 0); + SDValue V2 = SDValue(VLdA, 1); + SDValue V3 = SDValue(VLdB, 1); + SDValue V4 = SDValue(VLdA, 2); + SDValue V5 = SDValue(VLdB, 2); + SDValue V6 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdA, 3); + SDValue V7 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdB, 3); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, + V4, V5, V6, V7), 0); + + // Extract out the 3 / 4 Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), Q); + } + } else { + // Combine the even and odd subregs to produce the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } ReplaceUses(SDValue(N, NumVecs), Chain); @@ -1102,12 +1247,43 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Align); if (is64BitVector) { - unsigned Opc = DOpcodes[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + if (llvm::ModelWithRegSequence() && NumVecs >= 2) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + + // Form a REG_SEQUENCE to force register allocation. + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else { + SDValue V2 = N->getOperand(2+3); + // If it's a vld3, form a quad D-register and leave the last part as + // an undef. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, + RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, + RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, + RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, + RegSeq)); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); + unsigned Opc = DOpcodes[OpcodeIndex]; return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } @@ -1116,48 +1292,114 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VST1 and VST2, // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(Vec+3))); + if (llvm::ModelWithRegSequence() && NumVecs == 2) { + // First extract the pair of Q registers. + SDValue Q0 = N->getOperand(3); + SDValue Q1 = N->getOperand(4); + + // Form a QQ register. + SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT, + QQ)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3))); + } + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), + 5 + 2 * NumVecs); } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - 5 + 2 * NumVecs); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. + if (llvm::ModelWithRegSequence()) { + // Form the QQQQ REG_SEQUENCE. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3)); + V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); - Ops.push_back(Reg0); // post-access address offset + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); - // Store the even subregs. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - - // Store the odd subregs. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(Vec+3)); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; + // Store the even D registers. + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + Ops.push_back(Reg0); // post-access address offset + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, + RegVT, RegSeq)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd D registers. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, + RegVT, RegSeq); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } else { + Ops.push_back(Reg0); // post-access address offset + + // Store the even subregs. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd subregs. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1180,11 +1422,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, // Quad registers are handled by load/store of subregs. Find the subreg info. unsigned NumElts = 0; int SubregIdx = 0; + bool Even = false; EVT RegVT = VT; if (!is64BitVector) { RegVT = GetNEONSubregVT(VT); NumElts = RegVT.getVectorNumElements(); - SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1; + Even = Lane < NumElts; } unsigned OpcodeIndex; @@ -1211,8 +1455,35 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned Opc = 0; if (is64BitVector) { Opc = DOpcodes[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + if (llvm::ModelWithRegSequence()) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, + RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, + RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, + RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, + RegSeq)); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } } else { // Check if this is loading the even or odd subreg of a Q register. if (Lane < NumElts) { @@ -1221,10 +1492,32 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, Lane -= NumElts; Opc = QOpcodes1[OpcodeIndex]; } - // Extract the subregs of the input vector. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(Vec+3))); + + if (llvm::ModelWithRegSequence()) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); + } else { + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + } + + // Extract the subregs of the input vector. + unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, + RegSeq)); + } else { + // Extract the subregs of the input vector. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(Vec+3))); + } } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); @@ -1236,8 +1529,60 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); - SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); + + if (llvm::ModelWithRegSequence()) { + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + if (is64BitVector) { + SDValue V0 = SDValue(VLdLn, 0); + SDValue V1 = SDValue(VLdLn, 1); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = SDValue(VLdLn, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLdLn, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + } else { + // For 128-bit vectors, take the 64-bit results of the load and insert them + // as subregs into the result. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + if (Even) { + V[i] = SDValue(VLdLn, Vec); + V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + } else { + V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + V[i+1] = SDValue(VLdLn, Vec); + } + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + + if (NumVecs == 2) + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); + else + RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); + } + + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); + return NULL; + } + // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; @@ -1481,6 +1826,21 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { + // The only time a CONCAT_VECTORS operation can have legal types is when + // two 64-bit vectors are concatenated to a 128-bit vector. + EVT VT = N->getValueType(0); + if (!VT.is128BitVector() || N->getNumOperands() != 2) + llvm_unreachable("unexpected CONCAT_VECTORS"); + DebugLoc dl = N->getDebugLoc(); + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); @@ -1695,8 +2055,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; - return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other, - Ops, 5); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl, + MVT::v2f64, MVT::Other, Ops, 5); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + return Ret; } // Other cases are autogenerated. break; @@ -1712,7 +2076,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), N->getOperand(2), AM5Opc, Pred, PredReg, Chain }; - return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + return Ret; } // Other cases are autogenerated. break; @@ -1971,7 +2339,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } } + break; } + + case ISD::CONCAT_VECTORS: + return SelectConcatVector(N); } return SelectCode(N); @@ -1995,3 +2367,9 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new ARMDAGToDAGISel(TM, OptLevel); } + +/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model +/// operations involving sub-registers. +bool llvm::ModelWithRegSequence() { + return UseRegSeq; +} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d3842a6..b8126a3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -94,7 +94,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, } setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); + if (llvm::ModelWithRegSequence()) + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); + else + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); @@ -360,8 +363,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i32, Expand); // These are expanded into libcalls. - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); + if (!Subtarget->hasDivide()) { + // v7M has a hardware divider + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); @@ -373,6 +379,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -387,7 +395,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { + // If the subtarget does not have extract instructions, sign_extend_inreg + // needs to be expanded. Extract is available in ARM mode on v6 and up, + // and on most Thumb2 implementations. + if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops()) + || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } @@ -400,6 +412,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -451,9 +465,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::MUL); setStackPointerRegisterToSaveRestore(ARM::SP); - setSchedulingPreference(SchedulingForRegPressure); + + if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + setSchedulingPreference(Sched::RegPressure); + else + setSchedulingPreference(Sched::Hybrid); // FIXME: If-converter should use instruction latency to determine // profitability rather than relying on fixed limits. @@ -567,11 +586,35 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +/// getRegClassFor - Return the register class that should be used for the +/// specified value type. +TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { + // Map v4i64 to QQ registers but do not make the type legal. Similarly map + // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to + // load / store 4 to 8 consecutive D registers. + if (Subtarget->hasNEON()) { + if (VT == MVT::v4i64) + return ARM::QQPRRegisterClass; + else if (VT == MVT::v8i64) + return ARM::QQQQPRRegisterClass; + } + return TargetLowering::getRegClassFor(VT); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1; } +Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT.isFloatingPoint() || VT.isVector()) + return Sched::Latency; + } + return Sched::RegPressure; +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -1507,6 +1550,23 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, } SDValue +ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Val = Subtarget->isThumb() ? + DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : + DAG.getConstant(0, MVT::i32); + return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), + Op.getOperand(1), Val); +} + +SDValue +ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1), DAG.getConstant(0, MVT::i32)); +} + +SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { @@ -1545,12 +1605,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } - case Intrinsic::eh_sjlj_setjmp: - SDValue Val = Subtarget->isThumb() ? - DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : - DAG.getConstant(0, MVT::i32); - return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1), - Val); } } @@ -1652,7 +1706,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2092,9 +2146,31 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } +SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(4, MVT::i32); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), + NULL, 0, false, false, 0); + } + + // Return LR, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); +} + SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -2107,116 +2183,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } -SDValue -ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { - // Do repeated 4-byte loads and stores. To be improved. - // This requires 4-byte alignment. - if ((Align & 3) != 0) - return SDValue(); - // This requires the copy size to be a constant, preferrably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) - return SDValue(); - - unsigned BytesLeft = SizeVal & 3; - unsigned NumMemOps = SizeVal >> 2; - unsigned EmittedNumMemOps = 0; - EVT VT = MVT::i32; - unsigned VTSize = 4; - unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; - uint64_t SrcOff = 0, DstOff = 0; - - // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the - // same number of stores. The loads and stores will get combined into - // ldm/stm later on. - while (EmittedNumMemOps < NumMemOps) { - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); - TFOps[i] = Loads[i].getValue(1); - SrcOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, isVolatile, false, 0); - DstOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - EmittedNumMemOps += i; - } - - if (BytesLeft == 0) - return Chain; - - // Issue loads / stores for the trailing (1 - 3) bytes. - unsigned BytesLeftSave = BytesLeft; - i = 0; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, false, false, 0); - TFOps[i] = Loads[i].getValue(1); - ++i; - SrcOff += VTSize; - BytesLeft -= VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - i = 0; - BytesLeft = BytesLeftSave; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, false, false, 0); - ++i; - DstOff += VTSize; - BytesLeft -= VTSize; - } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); -} - /// ExpandBIT_CONVERT - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -2434,9 +2400,9 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, MVT::i32)); // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. @@ -2879,21 +2845,60 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } } - // If there are only 2 elements in a 128-bit vector, insert them into an - // undef vector. This handles the common case for 128-bit vector argument - // passing, where the insertions should be translated to subreg accesses - // with no real instructions. - if (VT.is128BitVector() && Op.getNumOperands() == 2) { - SDValue Val = DAG.getUNDEF(VT); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - if (Op0.getOpcode() != ISD::UNDEF) - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0, - DAG.getIntPtrConstant(0)); - if (Op1.getOpcode() != ISD::UNDEF) - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1, - DAG.getIntPtrConstant(1)); - return Val; + // Scan through the operands to see if only one value is used. + unsigned NumElts = VT.getVectorNumElements(); + bool isOnlyLowElement = true; + bool usesOnlyOneValue = true; + bool isConstant = true; + SDValue Value; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) + isConstant = false; + + if (!Value.getNode()) + Value = V; + else if (V != Value) + usesOnlyOneValue = false; + } + + if (!Value.getNode()) + return DAG.getUNDEF(VT); + + if (isOnlyLowElement) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); + + // If all elements are constants, fall back to the default expansion, which + // will generate a load from the constant pool. + if (isConstant) + return SDValue(); + + // Use VDUP for non-constant splats. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (usesOnlyOneValue && EltSize <= 32) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + + // Vectors with 32- or 64-bit elements can be built by directly assigning + // the subregisters. + if (EltSize >= 32) { + // Do the expansion with floating-point types, since that is what the VFP + // registers are defined to use, and since i64 is not legal. + EVT EltVT = EVT::getFloatingPointVT(EltSize); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); + SDValue Val = DAG.getUNDEF(VecVT); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Elt = Op.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) + continue; + Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt); + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt, + DAG.getConstant(i, MVT::i32)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } return SDValue(); @@ -3083,8 +3088,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. - if (VT.getVectorNumElements() == 4 && - (VT.is128BitVector() || VT.is64BitVector())) { + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts == 4) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (ShuffleMask[i] < 0) @@ -3096,7 +3101,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); @@ -3104,6 +3108,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } + // Implement shuffles with 32- or 64-bit elements as subreg copies. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize >= 32) { + // Do the expansion with floating-point types, since that is what the VFP + // registers are defined to use, and since i64 is not legal. + EVT EltVT = EVT::getFloatingPointVT(EltSize); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); + V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); + SDValue Val = DAG.getUNDEF(VecVT); + for (unsigned i = 0; i < NumElts; ++i) { + if (ShuffleMask[i] < 0) + continue; + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + ShuffleMask[i] < (int)NumElts ? V1 : V2, + DAG.getConstant(ShuffleMask[i] & (NumElts-1), + MVT::i32)); + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, + Elt, DAG.getConstant(i, MVT::i32)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); + } + return SDValue(); } @@ -3158,9 +3185,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); - case ISD::RETURNADDR: break; + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); @@ -3667,6 +3696,62 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } +static SDValue PerformMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + if (Subtarget->isThumb1Only()) + return SDValue(); + + if (DAG.getMachineFunction(). + getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return SDValue(); + + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!C) + return SDValue(); + + uint64_t MulAmt = C->getZExtValue(); + unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + ShiftAmt = ShiftAmt & (32 - 1); + SDValue V = N->getOperand(0); + DebugLoc DL = N->getDebugLoc(); + + SDValue Res; + MulAmt >>= ShiftAmt; + if (isPowerOf2_32(MulAmt - 1)) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(Log2_32(MulAmt-1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmt + 1)) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(Log2_32(MulAmt+1), + MVT::i32)), + V); + } else + return SDValue(); + + if (ShiftAmt != 0) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getConstant(ShiftAmt, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -4053,6 +4138,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); + case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: @@ -4432,9 +4518,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); } else return false; @@ -4442,13 +4530,25 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, - isInc, DAG); + isInc, DAG); else isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; + if (Ptr != Base) { + // Swap base ptr and offset to catch more post-index load / store when + // it's legal. In Thumb2 mode, offset must be an immediate. + if (Ptr == Offset && Op->getOpcode() == ISD::ADD && + !Subtarget->isThumb2()) + std::swap(Base, Offset); + + // Post-indexed load / store update the base pointer. + if (Ptr != Base) + return false; + } + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; return true; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d8a230f..9c7517c 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -236,13 +236,19 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - virtual const ARMSubtarget* getSubtarget() const { + const ARMSubtarget* getSubtarget() const { return Subtarget; } + /// getRegClassFor - Return the register class that should be used for the + /// specified value type. + virtual TargetRegisterClass *getRegClassFor(EVT VT) const; + /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; + bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -281,7 +287,8 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const; - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -296,20 +303,12 @@ namespace llvm { SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index b466d0d..d487df1 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -228,7 +228,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin, "", itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; } @@ -240,7 +240,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -252,7 +252,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = !strconcat(opc, asm); + let AsmString = !strconcat(opc, asm); let Pattern = pattern; let isPredicable = 0; list<Predicate> Predicates = [IsARM]; @@ -268,7 +268,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -280,7 +280,7 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -959,7 +959,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb]; } @@ -995,7 +995,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; } @@ -1140,7 +1140,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; } @@ -1152,7 +1152,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; } @@ -1163,7 +1163,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; } @@ -1280,7 +1280,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; } @@ -1292,7 +1292,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ce5f2f8..f3156d9 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -46,6 +46,7 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; +def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; @@ -100,7 +101,10 @@ def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; -def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; +def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", + SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; +def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", + SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, [SDNPHasChain]>; @@ -128,6 +132,8 @@ def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; +def HasDivide : Predicate<"Subtarget->hasDivide()">; +def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">; @@ -654,12 +660,12 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, - "@ ADJCALLSTACKUP $amt1", + "${:comment} ADJCALLSTACKUP $amt1", [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, - "@ ADJCALLSTACKDOWN $amt", + "${:comment} ADJCALLSTACKDOWN $amt", [(ARMcallseq_start timm:$amt)]>; } @@ -789,8 +795,11 @@ def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", } // A5.4 Permanently UNDEFINED instructions. -def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "", - [/* For disassembly only; pattern left blank */]>, +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to +// binutils +let isBarrier = 1, isTerminator = 1 in +def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, + ".long 0xe7ffdefe ${:comment} trap", [(trap)]>, Requires<[IsARM]> { let Inst{27-25} = 0b011; let Inst{24-20} = 0b11111; @@ -843,25 +852,19 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), // LEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, IIC_iALUi, - !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), - []>; + "adr$p\t$dst, #$label", []>; def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), - Pseudo, IIC_iALUi, - !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, " - "(${label}_${id}-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), - []> { + Pseudo, IIC_iALUi, + "adr$p\t$dst, #${label}_${id}", []> { let Inst{25} = 1; } +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -1134,7 +1137,8 @@ def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, + isReMaterializable = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", []>; @@ -1156,7 +1160,7 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, "ldrsb", "\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, "ldrd", "\t$dst1, $addr", @@ -1215,7 +1219,7 @@ def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>, Requires<[IsARM, HasV5TE]>; -} +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. @@ -1264,7 +1268,7 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStorer, "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; @@ -1356,7 +1360,7 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb), // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, LdStMulFrm, IIC_iLoadm, @@ -1367,9 +1371,9 @@ def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, IndexModeUpd, LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, LdStMulFrm, IIC_iStorem, @@ -1380,7 +1384,7 @@ def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, IndexModeUpd, LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>; -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq //===----------------------------------------------------------------------===// // Move Instructions. @@ -2198,6 +2202,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( +let neverHasSideEffects = 1 in { def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, IIC_iCMOVr, "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -2221,6 +2226,7 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), RegConstraint<"$false = $dst">, UnaryDP { let Inst{25} = 1; } +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -2528,12 +2534,12 @@ let Defs = def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" "add\t$val, pc, #8\n\t" "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" - "mov\tr0, #1 @ eh_setjmp end", "", + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } @@ -2543,16 +2549,30 @@ let Defs = def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" "add\t$val, pc, #8\n\t" "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" - "mov\tr0, #1 @ eh_setjmp end", "", + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } +// FIXME: Non-Darwin version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "ldr\tsp, [$src, #8]\n\t" + "ldr\t$scratch, [$src, #4]\n\t" + "ldr\tr7, [$src]\n\t" + "bx\t$scratch", "", + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsARM, IsDarwin]>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d5ce2b8..197ec16 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -115,7 +115,7 @@ def h64imm : Operand<i64> { // NEON load / store instructions //===----------------------------------------------------------------------===// -let mayLoad = 1 in { +let mayLoad = 1, neverHasSideEffects = 1 in { // Use vldmia to load a Q register as a D register pair. // This is equivalent to VLDMD except that it has a Q register operand // instead of a pair of D registers. @@ -123,11 +123,6 @@ def VLDMQ : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), IndexModeNone, IIC_fpLoadm, "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; -def VLDMQ_UPD - : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p), - IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}", - "$addr.base = $wb", []>; // Use vld1 to load a Q register as a D register pair. // This alternative to VLDMQ allows an alignment to be specified. @@ -135,13 +130,9 @@ def VLDMQ_UPD def VLD1q : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; -def VLD1q_UPD - : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64", - "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>; -} // mayLoad = 1 +} // mayLoad = 1, neverHasSideEffects = 1 -let mayStore = 1 in { +let mayStore = 1, neverHasSideEffects = 1 in { // Use vstmia to store a Q register as a D register pair. // This is equivalent to VSTMD except that it has a Q register operand // instead of a pair of D registers. @@ -149,11 +140,6 @@ def VSTMQ : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), IndexModeNone, IIC_fpStorem, "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; -def VSTMQ_UPD - : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p), - IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}", - "$addr.base = $wb", []>; // Use vst1 to store a Q register as a D register pair. // This alternative to VSTMQ allows an alignment to be specified. @@ -161,14 +147,9 @@ def VSTMQ_UPD def VST1q : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; -def VST1q_UPD - : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QPR:$src), - IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset", - "$addr.addr = $wb", []>; -} // mayStore = 1 +} // mayStore = 1, neverHasSideEffects = 1 -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> @@ -492,9 +473,9 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; // VLD3DUP : Vector Load (single 3-element structure to all lanes) // VLD4DUP : Vector Load (single 4-element structure to all lanes) // FIXME: Not yet implemented. -} // mayLoad = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> @@ -807,7 +788,7 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; -} // mayStore = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// @@ -815,27 +796,32 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; //===----------------------------------------------------------------------===// // Extract D sub-registers of Q registers. -// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) def DSubReg_i8_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); }]>; def DSubReg_i16_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); }]>; def DSubReg_i32_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); }]>; def DSubReg_f64_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); }]>; def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()), + MVT::i32); }]>; // Extract S sub-registers of Q/D registers. -// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) def SSubReg_f32_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32); + assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); }]>; // Translate lane numbers from Q registers to D subregs. @@ -2829,11 +2815,21 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // VMOV : Vector Move (Register) +let neverHasSideEffects = 1 in { def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; +// Pseudo vector move instructions for QQ and QQQQ registers. This should +// be expanded after register allocation is completed. +def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), + NoItinerary, "${:comment} vmov\t$dst, $src", []>; + +def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), + NoItinerary, "${:comment} vmov\t$dst, $src", []>; +} // neverHasSideEffects + // VMOV : Vector Move (Immediate) // VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. @@ -2871,6 +2867,7 @@ def vmovImm64 : PatLeaf<(build_vector), [{ // Note: Some of the cmode bits in the following VMOV instructions need to // be encoded based on the immed values. +let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", @@ -2906,6 +2903,7 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; +} // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) @@ -3018,11 +3016,11 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; def : Pat<(v2f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; def : Pat<(v4f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v8i8 (scalar_to_vector GPR:$src)), (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; @@ -3034,15 +3032,15 @@ def : Pat<(v2i32 (scalar_to_vector GPR:$src)), def : Pat<(v16i8 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; def : Pat<(v8i16 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; def : Pat<(v4i32 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; // VDUP : Vector Duplicate (from ARM core register to all elements) @@ -3376,27 +3374,27 @@ def VTBX4 class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> : NEONFPPat<(ResTy (OpNode SPR:$a)), (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0))), - arm_ssubreg_0)>; + SPR:$a, ssub_0))), + ssub_0)>; class N3VSPat<SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), (EXTRACT_SUBREG (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), + SPR:$a, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0))), - arm_ssubreg_0)>; + SPR:$b, ssub_0))), + ssub_0)>; class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, arm_ssubreg_0), + SPR:$acc, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), + SPR:$a, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; + SPR:$b, ssub_0)), + ssub_0)>; // These need separate instructions because they must use DPR_VFP2 register // class which have SPR sub-registers. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index e3ca536..40f924b 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -127,12 +127,12 @@ def t_addrmode_sp : Operand<i32>, let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def tADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, - "@ tADJCALLSTACKUP $amt1", + "${:comment} tADJCALLSTACKUP $amt1", [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>; def tADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, - "@ tADJCALLSTACKDOWN $amt", + "${:comment} tADJCALLSTACKDOWN $amt", [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; } @@ -254,14 +254,14 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, // Pseudo instruction that will expand into a tSUBspi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), - NoItinerary, "@ sub\t$dst, $rhs", []>; + NoItinerary, "${:comment} sub\t$dst, $rhs", []>; def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - NoItinerary, "@ add\t$dst, $rhs", []>; + NoItinerary, "${:comment} add\t$dst, $rhs", []>; let Defs = [CPSR] in def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - NoItinerary, "@ and\t$dst, $rhs", []>; + NoItinerary, "${:comment} and\t$dst, $rhs", []>; } // usesCustomInserter //===----------------------------------------------------------------------===// @@ -374,7 +374,7 @@ let isBranch = 1, isTerminator = 1 in { // Far jump let Defs = [LR] in def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, - "bl\t$target\t@ far jump",[]>; + "bl\t$target\t${:comment} far jump",[]>; def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), @@ -417,9 +417,13 @@ def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>, } } -// A8.6.16 B: Encoding T1 -- for disassembly only +// A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1110 then UNDEFINED -def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 { +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to +// binutils +let isBarrier = 1, isTerminator = 1 in +def tTRAP : TI<(outs), (ins), IIC_Br, + ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 { let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1110; } @@ -476,7 +480,7 @@ def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). -let canFoldAsLoad = 1, mayLoad = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; @@ -490,7 +494,8 @@ def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59 // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, + isReMaterializable = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; @@ -527,7 +532,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, [(store tGPR:$src, t_addrmode_sp:$addr)]>, T1LdStSP<{0,?,?}>; -let mayStore = 1 in { +let mayStore = 1, neverHasSideEffects = 1 in { // Special instruction for spill. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, @@ -540,7 +545,7 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, // // These requires base address to be written back or one of the loaded regs. -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def tLDM : T1I<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, @@ -553,9 +558,9 @@ def tLDM_UPD : T1It<(outs tGPR:$wb), "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def tSTM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, @@ -866,11 +871,12 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), - NoItinerary, "@ tMOVCCr $cc", + NoItinerary, "${:comment} tMOVCCr $cc", [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; // 16-bit movcc in IT blocks for Thumb2. +let neverHasSideEffects = 1 in { def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, "mov", "\t$dst, $rhs", []>, T1Special<{1,0,?,?}>; @@ -878,9 +884,12 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi, "mov", "\t$dst, $rhs", []>, T1General<{1,0,0,?,?}>; +} // neverHasSideEffects // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p\t$dst, #$label", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 @@ -889,6 +898,7 @@ def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 +} // neverHasSideEffects //===----------------------------------------------------------------------===// // TLS Instructions @@ -918,16 +928,32 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; } + +// FIXME: Non-Darwin version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "ldr\t$scratch, [$src, #8]\n\t" + "mov\tsp, $scratch\n\t" + "ldr\t$scratch, [$src, #4]\n\t" + "ldr\tr7, [$src]\n\t" + "bx\t$scratch", "", + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsThumb, IsDarwin]>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -1011,7 +1037,7 @@ def : T1Pat<(i32 imm0_255_comp:$src), // scheduling. let isReMaterializable = 1 in def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 742bd40..b91c089 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -185,8 +185,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - opc, ".w\t$dst, $src", + def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + opc, ".w\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -198,9 +198,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, - opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_reg:$src))]> { + def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, + opc, ".w\t$dst, $src", + [(set GPR:$dst, (opnode t2_so_reg:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -210,7 +210,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, } /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a -// binary operation that produces a value. These are predicable and can be +/// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0, string wide =""> { @@ -259,23 +259,23 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_bin_irs counterpart. multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, - opc, ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + opc, ".w\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. + let Inst{20} = ?; // The S bit. let Inst{15} = 0; } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, - opc, "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + opc, "\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. + let Inst{20} = ?; // The S bit. } } @@ -461,10 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s), - IIC_iALUi, - !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -472,10 +471,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { let Inst{15} = 0; } // shifted register - def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s), - IIC_iALUsi, - !strconcat(opc, "${s}\t$dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + !strconcat(opc, "s"), "\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -639,7 +637,8 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set GPR:$dst, (opnode GPR:$src))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -650,7 +649,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -665,7 +665,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set GPR:$dst, (opnode GPR:$src))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -676,7 +677,8 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -717,7 +719,8 @@ multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", - [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]> { + [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -728,7 +731,8 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, - (rotr GPR:$RHS, rot_imm:$rot)))]> { + (rotr GPR:$RHS, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -771,6 +775,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { // LEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p.w\t$dst, #$label", []> { let Inst{31-27} = 0b11110; @@ -792,6 +798,7 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } +} // neverHasSideEffects // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), @@ -856,9 +863,11 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), let Inst{15} = 0; } -// Signed and unsigned division, for disassembly only +// Signed and unsigned division on v7-M def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, - "sdiv", "\t$dst, $a, $b", []> { + "sdiv", "\t$dst, $a, $b", + [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>, + Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; let Inst{20} = 0b1; @@ -867,7 +876,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, } def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, - "udiv", "\t$dst, $a, $b", []> { + "udiv", "\t$dst, $a, $b", + [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>, + Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; let Inst{20} = 0b1; @@ -878,11 +889,11 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, // Pseudo instruction that will expand into a t2SUBrSPi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>; + NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>; def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - NoItinerary, "@ subw\t$dst, $sp, $imm", []>; + NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>; def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - NoItinerary, "@ sub\t$dst, $sp, $rhs", []>; + NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>; } // usesCustomInserter @@ -902,7 +913,7 @@ defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), (ins t2addrmode_imm8s4:$addr), @@ -912,7 +923,7 @@ def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), "ldrd", "\t$dst1, $addr", []> { let Inst{19-16} = 0b1111; // Rn } -} +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr), @@ -955,7 +966,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), (t2LDRHpci tconstpool:$addr)>; // Indexed loads -let mayLoad = 1 in { +let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, @@ -1011,7 +1022,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; -} +} // mayLoad = 1, neverHasSideEffects = 1 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are // for disassembly only. @@ -1041,7 +1052,7 @@ defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword -let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in +let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr), IIC_iStorer, "strd", "\t$src1, $addr", []>; @@ -1204,7 +1215,7 @@ defm t2PLI : T2Ipl<1, 0, "pli">; // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> { @@ -1227,9 +1238,9 @@ def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, let Inst{21} = 1; // The W bit. let Inst{20} = 1; // Load } -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> { @@ -1253,7 +1264,7 @@ def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, let Inst{21} = 1; // The W bit. let Inst{20} = 0; // Store } -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq //===----------------------------------------------------------------------===// // Move Instructions. @@ -1564,9 +1575,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, } let Defs = [CPSR] in { -def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "lsrs.w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { +def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "lsrs", ".w\t$dst, $src, #1", + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1577,9 +1588,9 @@ def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, let Inst{14-12} = 0b000; let Inst{7-6} = 0b01; } -def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "asrs.w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { +def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "asrs", ".w\t$dst, $src, #1", + [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2058,7 +2069,8 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), - 0xFFFF0000)))]> { + 0xFFFF0000)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2068,15 +2080,18 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHBT where identities eliminate some nodes. def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), - (t2PKHBT GPR:$src1, GPR:$src2, 0)>; + (t2PKHBT GPR:$src1, GPR:$src2, 0)>, + Requires<[HasT2ExtractPack]>; def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; + (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>, + Requires<[HasT2ExtractPack]>; def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]> { + 0xFFFF)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2087,10 +2102,12 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (t2PKHTB GPR:$src1, GPR:$src2, 16)>; + (t2PKHTB GPR:$src1, GPR:$src2, 16)>, + Requires<[HasT2ExtractPack]>; def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; + (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>, + Requires<[HasT2ExtractPack]>; //===----------------------------------------------------------------------===// // Comparison Instructions... @@ -2127,6 +2144,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( +let neverHasSideEffects = 1 in { def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -2178,6 +2196,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -2378,13 +2397,13 @@ let Defs = D31 ] in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; @@ -2394,13 +2413,13 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; @@ -2672,7 +2691,7 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 36fcaa1..54474cf 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -76,7 +76,7 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { @@ -104,9 +104,9 @@ def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, "$addr.base = $wb", []> { let Inst{20} = 1; } -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { @@ -134,7 +134,7 @@ def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, "$addr.base = $wb", []> { let Inst{20} = 0; } -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores @@ -313,6 +313,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), IIC_fpMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; +let neverHasSideEffects = 1 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src", @@ -326,6 +327,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } +} // neverHasSideEffects // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -337,6 +339,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, let Inst{7-6} = 0b00; } +let neverHasSideEffects = 1 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", @@ -606,6 +609,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, // FP Conditional moves. // +let neverHasSideEffects = 1 in { def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$false, DPR:$true), IIC_fpUNA64, "vmov", ".f64\t$dst, $true", @@ -629,7 +633,7 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; - +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Misc. @@ -651,6 +655,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", // FPSCR <-> GPR (for disassembly only) +let neverHasSideEffects = 1 in { let Uses = [FPSCR] in { def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", "\t$dst, fpscr", @@ -674,6 +679,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", let Inst{4} = 1; } } +} // neverHasSideEffects // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index b31a4fa..5f6d7ee 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -318,6 +318,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, *((intptr_t*)RelocPos) |= ResultPtr; break; } + case ARM::reloc_arm_movw: { + ResultPtr = ResultPtr & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } + case ARM::reloc_arm_movt: { + ResultPtr = (ResultPtr >> 16) & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } } } } diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 041afd0..8edfb9a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -23,16 +23,6 @@ namespace llvm { class ARMBaseInstrInfo; class Type; -namespace ARM { - /// SubregIndex - The index of various subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// ARMRegisterInfo.td file. - enum SubregIndex { - SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, - DSUBREG_0 = 5, DSUBREG_1 = 6 - }; -} - struct ARMRegisterInfo : public ARMBaseRegisterInfo { public: ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 0d4200c..6beca8b 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -23,6 +23,44 @@ class ARMFReg<bits<6> num, string n> : Register<n> { let Namespace = "ARM"; } +// Subregister indices. +let Namespace = "ARM" in { +// Note: Code depends on these having consecutive numbers. +def ssub_0 : SubRegIndex; +def ssub_1 : SubRegIndex; +def ssub_2 : SubRegIndex; // In a Q reg. +def ssub_3 : SubRegIndex; +def ssub_4 : SubRegIndex; // In a QQ reg. +def ssub_5 : SubRegIndex; +def ssub_6 : SubRegIndex; +def ssub_7 : SubRegIndex; +def ssub_8 : SubRegIndex; // In a QQQQ reg. +def ssub_9 : SubRegIndex; +def ssub_10 : SubRegIndex; +def ssub_11 : SubRegIndex; +def ssub_12 : SubRegIndex; +def ssub_13 : SubRegIndex; +def ssub_14 : SubRegIndex; +def ssub_15 : SubRegIndex; + +def dsub_0 : SubRegIndex; +def dsub_1 : SubRegIndex; +def dsub_2 : SubRegIndex; +def dsub_3 : SubRegIndex; +def dsub_4 : SubRegIndex; +def dsub_5 : SubRegIndex; +def dsub_6 : SubRegIndex; +def dsub_7 : SubRegIndex; + +def qsub_0 : SubRegIndex; +def qsub_1 : SubRegIndex; +def qsub_2 : SubRegIndex; +def qsub_3 : SubRegIndex; + +def qqsub_0 : SubRegIndex; +def qqsub_1 : SubRegIndex; +} + // Integer registers def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>; def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>; @@ -58,9 +96,9 @@ def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">; def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">; def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">; def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; -def SDummy : ARMFReg<63, "sINVALID">; // Aliases of the F* registers used to hold 64-bit fp values (doubles) +let SubRegIndices = [ssub_0, ssub_1] in { def D0 : ARMReg< 0, "d0", [S0, S1]>; def D1 : ARMReg< 1, "d1", [S2, S3]>; def D2 : ARMReg< 2, "d2", [S4, S5]>; @@ -77,6 +115,7 @@ def D12 : ARMReg<12, "d12", [S24, S25]>; def D13 : ARMReg<13, "d13", [S26, S27]>; def D14 : ARMReg<14, "d14", [S28, S29]>; def D15 : ARMReg<15, "d15", [S30, S31]>; +} // VFP3 defines 16 additional double registers def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">; @@ -89,6 +128,9 @@ def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">; def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">; // Advanced SIMD (NEON) defines 16 quad-word aliases +let SubRegIndices = [dsub_0, dsub_1], + CompositeIndices = [(ssub_2 dsub_1, ssub_0), + (ssub_3 dsub_1, ssub_1)] in { def Q0 : ARMReg< 0, "q0", [D0, D1]>; def Q1 : ARMReg< 1, "q1", [D2, D3]>; def Q2 : ARMReg< 2, "q2", [D4, D5]>; @@ -97,6 +139,8 @@ def Q4 : ARMReg< 4, "q4", [D8, D9]>; def Q5 : ARMReg< 5, "q5", [D10, D11]>; def Q6 : ARMReg< 6, "q6", [D12, D13]>; def Q7 : ARMReg< 7, "q7", [D14, D15]>; +} +let SubRegIndices = [dsub_0, dsub_1] in { def Q8 : ARMReg< 8, "q8", [D16, D17]>; def Q9 : ARMReg< 9, "q9", [D18, D19]>; def Q10 : ARMReg<10, "q10", [D20, D21]>; @@ -105,6 +149,51 @@ def Q12 : ARMReg<12, "q12", [D24, D25]>; def Q13 : ARMReg<13, "q13", [D26, D27]>; def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; +} + +// Pseudo 256-bit registers to represent pairs of Q registers. These should +// never be present in the emitted code. +// These are used for NEON load / store instructions, e.g. vld4, vst3. +// NOTE: It's possible to define more QQ registers since technical the +// starting D register number doesn't have to be multiple of 4. e.g. +// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register +// stuffs very messy. +let SubRegIndices = [qsub_0, qsub_1] in { +let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1), + (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1), + (ssub_6 qsub_1, ssub_2), (ssub_7 qsub_1, ssub_3)] in { +def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>; +def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>; +def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>; +def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>; +} +let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in { +def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>; +def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>; +def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>; +def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>; +} +} + +// Pseudo 512-bit registers to represent four consecutive Q registers. +let SubRegIndices = [qqsub_0, qqsub_1] in { +let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), + (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), + (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3), + (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1), + (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3), + (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5), + (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in { +def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; +def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; +} +let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), + (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), + (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in { +def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>; +def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; +} +} // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; @@ -270,11 +359,6 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>; -// Dummy f32 regclass to represent impossible subreg indices. -def SPR_INVALID : RegisterClass<"ARM", [f32], 32, [SDummy]> { - let CopyCost = -1; -} - // Scalar double precision floating point / generic 64-bit vector register // class. // ARM requires only word alignment for double. It's more performant if it @@ -284,7 +368,6 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31]> { - let SubRegClassList = [SPR_INVALID, SPR_INVALID]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -332,79 +415,68 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15]> { - let SubRegClassList = [SPR, SPR]; + let SubRegClasses = [(SPR ssub_0, ssub_1)]; } // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7]> { - let SubRegClassList = [SPR_8, SPR_8]; + let SubRegClasses = [(SPR_8 ssub_0, ssub_1)]; } // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { - let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID, - DPR, DPR]; + let SubRegClasses = [(DPR dsub_0, dsub_1)]; } // Subset of QPR that have 32-bit SPR subregs. def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> { - let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2]; + let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_VFP2 dsub_0, dsub_1)]; } // Subset of QPR that have DPR_8 and SPR_8 subregs. def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3]> { - let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8]; + let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_8 dsub_0, dsub_1)]; +} + +// Pseudo 256-bit vector register class to model pairs of Q registers +// (4 consecutive D registers). +def QQPR : RegisterClass<"ARM", [v4i64], + 256, + [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> { + let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), + (QPR qsub_0, qsub_1)]; +} + +// Subset of QQPR that have 32-bit SPR subregs. +def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], + 256, + [QQ0, QQ1, QQ2, QQ3]> { + let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3), + (QPR_VFP2 qsub_0, qsub_1)]; + +} + +// Pseudo 512-bit vector register class to model 4 consecutive Q registers +// (8 consecutive D registers). +def QQQQPR : RegisterClass<"ARM", [v8i64], + 256, + [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> { + let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, + dsub_4, dsub_5, dsub_6, dsub_7), + (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; } // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; -//===----------------------------------------------------------------------===// -// Subregister Set Definitions... now that we have all of the pieces, define the -// sub registers for each register. -// - -def arm_ssubreg_0 : PatLeaf<(i32 1)>; -def arm_ssubreg_1 : PatLeaf<(i32 2)>; -def arm_ssubreg_2 : PatLeaf<(i32 3)>; -def arm_ssubreg_3 : PatLeaf<(i32 4)>; -def arm_dsubreg_0 : PatLeaf<(i32 5)>; -def arm_dsubreg_1 : PatLeaf<(i32 6)>; - -// S sub-registers of D registers. -def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [S0, S2, S4, S6, S8, S10, S12, S14, - S16, S18, S20, S22, S24, S26, S28, S30]>; -def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [S1, S3, S5, S7, S9, S11, S13, S15, - S17, S19, S21, S23, S25, S27, S29, S31]>; - -// S sub-registers of Q registers. -def : SubRegSet<1, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S0, S4, S8, S12, S16, S20, S24, S28]>; -def : SubRegSet<2, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S1, S5, S9, S13, S17, S21, S25, S29]>; -def : SubRegSet<3, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S2, S6, S10, S14, S18, S22, S26, S30]>; -def : SubRegSet<4, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S3, S7, S11, S15, S19, S23, S27, S31]>; - -// D sub-registers of Q registers. -def : SubRegSet<5, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], - [D0, D2, D4, D6, D8, D10, D12, D14, - D16, D18, D20, D22, D24, D26, D28, D30]>; -def : SubRegSet<6, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], - [D1, D3, D5, D7, D9, D11, D13, D15, - D17, D19, D21, D23, D25, D27, D29, D31]>; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h index 2cc2950..86e7206 100644 --- a/lib/Target/ARM/ARMRelocations.h +++ b/lib/Target/ARM/ARMRelocations.h @@ -47,7 +47,13 @@ namespace llvm { reloc_arm_pic_jt, // reloc_arm_branch - Branch address relocation. - reloc_arm_branch + reloc_arm_branch, + + // reloc_arm_movt - MOVT immediate relocation. + reloc_arm_movt, + + // reloc_arm_movw - MOVW immediate relocation. + reloc_arm_movw }; } } diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index c04ee38..a289407 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -12,11 +12,123 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-selectiondag-info" -#include "ARMSelectionDAGInfo.h" +#include "ARMTargetMachine.h" using namespace llvm; -ARMSelectionDAGInfo::ARMSelectionDAGInfo() { +ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<ARMSubtarget>()) { } ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { } + +SDValue +ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff, false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index afe9a47..d7d00c2 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -19,9 +19,24 @@ namespace llvm { class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + public: - ARMSelectionDAGInfo(); + explicit ARMSelectionDAGInfo(const TargetMachine &TM); ~ARMSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; }; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b11580a..10fd257 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -39,6 +39,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , IsR9Reserved(ReserveR9) , UseMovt(UseMOVT) , HasFP16(false) + , HasHardwareDivide(false) + , HasT2ExtractPack(false) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. @@ -73,6 +75,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { ARMArchVersion = V7A; + if (Len >= Idx+2 && TT[Idx+1] == 'm') + ARMArchVersion = V7M; } else if (SubVer == '6') { ARMArchVersion = V6; if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 288a19a..8332bba 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,7 +26,7 @@ class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4, V4T, V5T, V5TE, V6, V6T2, V7A + V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M }; enum ARMFPEnum { @@ -39,7 +39,7 @@ protected: }; /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE, - /// V6, V6T2, V7A. + /// V6, V6T2, V7A, V7M. ARMArchEnum ARMArchVersion; /// ARMFPUType - Floating Point Unit type. @@ -74,6 +74,13 @@ protected: /// only so far) bool HasFP16; + /// HasHardwareDivide - True if subtarget supports [su]div + bool HasHardwareDivide; + + /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack + /// instructions. + bool HasT2ExtractPack; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -123,6 +130,8 @@ protected: bool hasNEON() const { return ARMFPUType >= NEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } + bool hasDivide() const { return HasHardwareDivide; } + bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 662e61e..b4a9252 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -62,7 +62,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, @@ -76,7 +77,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 4e205df..a222e57 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -21,6 +21,7 @@ #include "ARMJITInfo.h" #include "ARMSubtarget.h" #include "ARMISelLowering.h" +#include "ARMSelectionDAGInfo.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/OwningPtr.h" @@ -63,6 +64,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMInstrInfo InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; + ARMSelectionDAGInfo TSInfo; public: ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -75,6 +77,10 @@ public: return &TLInfo; } + virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } }; @@ -88,6 +94,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; + ARMSelectionDAGInfo TSInfo; public: ThumbTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -101,6 +108,10 @@ public: return &TLInfo; } + virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + /// returns either Thumb1InstrInfo or Thumb2InstrInfo virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 80a9d2d..d95efdb 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -319,16 +319,16 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); if (Modifier && strcmp(Modifier, "dregpair") == 0) { - unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, 5);// arm_dsubreg_0 - unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, 6);// arm_dsubreg_1 + unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0); + unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1); O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}'; } else if (Modifier && strcmp(Modifier, "lane") == 0) { unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); unsigned DReg = - TM.getRegisterInfo()->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, - &ARM::DPR_VFP2RegClass); + TM.getRegisterInfo()->getMatchingSuperReg(Reg, + RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; } else { assert(!MO.getSubReg() && "Subregs should be eliminated!"); @@ -1375,13 +1375,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. // This is a hack that lowers as a two instruction sequence. unsigned DstReg = MI->getOperand(0).getReg(); - unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); - + const MachineOperand &MO = MI->getOperand(1); + MCOperand V1, V2; + if (MO.isImm()) { + unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + V1 = MCOperand::CreateImm(ImmVal & 65535); + V2 = MCOperand::CreateImm(ImmVal >> 16); + } else if (MO.isGlobal()) { + MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO); + const MCSymbolRefExpr *SymRef1 = + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_LO16, OutContext); + const MCSymbolRefExpr *SymRef2 = + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_HI16, OutContext); + V1 = MCOperand::CreateExpr(SymRef1); + V2 = MCOperand::CreateExpr(SymRef2); + } else { + MI->dump(); + llvm_unreachable("cannot handle this operand"); + } + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg - TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm) + TmpInst.addOperand(V1); // lower16(imm) // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); @@ -1395,7 +1414,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.setOpcode(ARM::MOVTi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg - TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16)); // upper16(imm) + TmpInst.addOperand(V2); // upper16(imm) // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index ac6331f..2b94b76 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -195,8 +195,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // FIXME: Breaks e.g. ARM/vmul.ll. assert(0); /* - unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 - unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 + unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0); + unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1); O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}';*/ @@ -217,7 +217,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported")); O << '#' << Op.getImm(); } else { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0) + llvm_unreachable("Unsupported modifier"); assert(Op.isExpr() && "unknown operand kind in printOperand"); O << *Op.getExpr(); } diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h index 383d30d..b81a306 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h @@ -26,7 +26,7 @@ namespace llvm { //class ARMSubtarget; /// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst. -class VISIBILITY_HIDDEN ARMMCInstLower { +class LLVM_LIBRARY_VISIBILITY ARMMCInstLower { MCContext &Ctx; Mangler &Mang; AsmPrinter &Printer; diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 3c0414d..0a4400c 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -118,7 +118,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>(); const TargetMachine &TM = Fn.getTarget(); - if (AFI->isThumbFunction()) + if (AFI->isThumb1OnlyFunction()) return false; TRI = TM.getRegisterInfo(); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index ef6bf3a..a725898 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -33,7 +33,8 @@ namespace { private: bool FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs); + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const; bool PreAllocNEONRegisters(MachineBasicBlock &MBB); }; @@ -338,24 +339,122 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, return false; } -bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs) { - MachineInstr *RegSeq = 0; +bool +NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const { + MachineOperand &FMO = MI->getOperand(FirstOpnd); + assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = FMO.getReg(); + (void)VirtReg; + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + + unsigned LastSubIdx = 0; + if (FMO.isDef()) { + MachineInstr *RegSeq = 0; + for (unsigned R = 0; R < NumRegs; ++R) { + const MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + // Feeding into a REG_SEQUENCE. + if (!MRI->hasOneNonDBGUse(VirtReg)) + return false; + MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg); + if (!UseMI->isRegSequence()) + return false; + if (RegSeq && RegSeq != UseMI) + return false; + unsigned OpIdx = 1 + (Offset + R * Stride) * 2; + if (UseMI->getOperand(OpIdx).getReg() != VirtReg) + llvm_unreachable("Malformed REG_SEQUENCE instruction!"); + unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm(); + if (LastSubIdx) { + if (LastSubIdx != SubIdx-Stride) + return false; + } else { + // Must start from dsub_0 or qsub_0. + if (SubIdx != (ARM::dsub_0+Offset) && + SubIdx != (ARM::qsub_0+Offset)) + return false; + } + RegSeq = UseMI; + LastSubIdx = SubIdx; + } + + // In the case of vld3, etc., make sure the trailing operand of + // REG_SEQUENCE is an undef. + if (NumRegs == 3) { + unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2; + const MachineOperand &MO = RegSeq->getOperand(OpIdx); + unsigned VirtReg = MO.getReg(); + MachineInstr *DefMI = MRI->getVRegDef(VirtReg); + if (!DefMI || !DefMI->isImplicitDef()) + return false; + } + return true; + } + + unsigned LastSrcReg = 0; + SmallVector<unsigned, 4> SubIds; for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); + const MachineOperand &MO = MI->getOperand(FirstOpnd + R); assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); unsigned VirtReg = MO.getReg(); assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "expected a virtual register"); - if (!MRI->hasOneNonDBGUse(VirtReg)) + // Extracting from a Q or QQ register. + MachineInstr *DefMI = MRI->getVRegDef(VirtReg); + if (!DefMI || !DefMI->isExtractSubreg()) return false; - MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg); - if (UseMI->getOpcode() != TargetOpcode::REG_SEQUENCE) + VirtReg = DefMI->getOperand(1).getReg(); + if (LastSrcReg && LastSrcReg != VirtReg) return false; - if (RegSeq && RegSeq != UseMI) + LastSrcReg = VirtReg; + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + if (RC != ARM::QPRRegisterClass && + RC != ARM::QQPRRegisterClass && + RC != ARM::QQQQPRRegisterClass) return false; - RegSeq = UseMI; + unsigned SubIdx = DefMI->getOperand(2).getImm(); + if (LastSubIdx) { + if (LastSubIdx != SubIdx-Stride) + return false; + } else { + // Must start from dsub_0 or qsub_0. + if (SubIdx != (ARM::dsub_0+Offset) && + SubIdx != (ARM::qsub_0+Offset)) + return false; + } + SubIds.push_back(SubIdx); + LastSubIdx = SubIdx; } + + // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is + // currently required for correctness. e.g. + // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6 + // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6 + // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5 + // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>, + // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5 + // respectively. + // We need to change how we model uses of REG_SEQUENCE. + for (unsigned R = 0; R < NumRegs; ++R) { + MachineOperand &MO = MI->getOperand(FirstOpnd + R); + unsigned OldReg = MO.getReg(); + MachineInstr *DefMI = MRI->getVRegDef(OldReg); + assert(DefMI->isExtractSubreg()); + MO.setReg(LastSrcReg); + MO.setSubReg(SubIds[R]); + if (R != 0) + MO.setIsKill(false); + // Delete the EXTRACT_SUBREG if its result is now dead. + if (MRI->use_empty(OldReg)) + DefMI->eraseFromParent(); + } + return true; } @@ -368,7 +467,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { unsigned FirstOpnd, NumRegs, Offset, Stride; if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - if (FormsRegSequence(MI, FirstOpnd, NumRegs)) + if (llvm::ModelWithRegSequence() && + FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; MachineBasicBlock::iterator NextI = llvm::next(MBBI); @@ -390,7 +490,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { if (MO.isUse()) { // Insert a copy from VirtReg. TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg, - ARM::DPRRegisterClass, ARM::DPRRegisterClass); + ARM::DPRRegisterClass, ARM::DPRRegisterClass, + DebugLoc()); if (MO.isKill()) { MachineInstr *CopyMI = prior(MBBI); CopyMI->findRegisterUseOperand(VirtReg)->setIsKill(); @@ -399,7 +500,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { } else if (MO.isDef() && !MO.isDead()) { // Add a copy to VirtReg. TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(), - ARM::DPRRegisterClass, ARM::DPRRegisterClass); + ARM::DPRRegisterClass, ARM::DPRRegisterClass, + DebugLoc()); } } } diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index b10c3f7..fae84d4 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -17,6 +17,7 @@ #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" @@ -36,10 +37,8 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == ARM::GPRRegisterClass) { if (SrcRC == ARM::GPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); @@ -97,10 +96,8 @@ canFoldMemoryOperand(const MachineInstr *MI, void Thumb1InstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { assert((RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) && "Unknown regclass!"); @@ -108,6 +105,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -124,10 +124,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void Thumb1InstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { assert((RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) && "Unknown regclass!"); @@ -135,6 +133,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -150,7 +151,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool Thumb1InstrInfo:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -161,9 +163,22 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, AddDefaultPred(MIB); for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - MIB.addReg(Reg, RegState::Kill); + bool isKill = true; + + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress + // then it's already added to the function and entry block live-in sets. + if (Reg == ARM::LR) { + MachineFunction &MF = *MBB.getParent(); + if (MF.getFrameInfo()->isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; + } + + if (isKill) { + MBB.addLiveIn(Reg); + MIB.addReg(Reg, RegState::Kill); + } } return true; } @@ -171,7 +186,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool Thumb1InstrInfo:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (CSI.empty()) diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 516ddf1..c937296 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -39,25 +39,30 @@ public: bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index b143bd9..531d5e9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -40,10 +40,8 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == ARM::GPRRegisterClass) { if (SrcRC == ARM::GPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); @@ -63,17 +61,18 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, } // Handle SPR, DPR, and QPR copies. - return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC); + return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL); } void Thumb2InstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -87,17 +86,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, return; } - ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC); + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI); } void Thumb2InstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -110,7 +110,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, return; } - ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC); + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index a0f89a6..2948770 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -35,17 +35,20 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should |