diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2010-05-27 15:15:58 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2010-05-27 15:15:58 +0000 |
commit | 1e3dec662ea18131c495db50caccc57f77b7a5fe (patch) | |
tree | 9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/Target | |
parent | 377552607e51dc1d3e6ff33833f9620bcfe815ac (diff) | |
download | FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz |
Update LLVM to r104832.
Diffstat (limited to 'lib/Target')
180 files changed, 4305 insertions, 1971 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index b08f942..ae7ae59 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -48,7 +48,7 @@ namespace ARMCC { AL }; - inline static CondCodes getOppositeCondition(CondCodes CC){ + inline static CondCodes getOppositeCondition(CondCodes CC) { switch (CC) { default: llvm_unreachable("Unknown condition code"); case EQ: return NE; @@ -67,7 +67,7 @@ namespace ARMCC { case LE: return GT; } } -} +} // namespace ARMCC inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { switch (CC) { @@ -90,6 +90,10 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } +/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model +/// operations involving sub-registers. +bool ModelWithRegSequence(); + FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b4dec0c..f1e6a9f 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -32,6 +32,8 @@ def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", "ARM v6t2">; def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", "ARM v7A">; +def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", + "ARM v7M">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", "Enable VFP2 instructions">; def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3", @@ -42,6 +44,10 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2", "Enable Thumb2 instructions">; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; +def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", + "Enable divide instructions">; +def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true", + "Enable Thumb2 extract and pack instructions">; // Some processors have multiply-accumulate instructions that don't // play nicely with other VFP instructions, and it's generally better @@ -123,9 +129,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, - FeatureNEONForFP]>; + FeatureNEONForFP, FeatureT2ExtractPack]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureThumb2, FeatureNEON]>; + [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>; +def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; +def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index a193858..2528854 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" @@ -196,6 +197,42 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return NewMIs[0]; } +bool +ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + if (CSI.empty()) + return false; + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + bool isKill = true; + + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress + // then it's already added to the function and entry block live-in sets. + if (Reg == ARM::LR) { + MachineFunction &MF = *MBB.getParent(); + if (MF.getFrameInfo()->isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; + } + + if (isKill) + MBB.addLiveIn(Reg); + + // Insert the spill to the stack frame. The register is killed at the spill + // + storeRegToStackSlot(MBB, MI, Reg, isKill, + CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI); + } + return true; +} + // Branch analysis. bool ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, @@ -481,6 +518,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // If this machine instr is a constant pool entry, its size is recorded as // operand #2. return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_longjmp: + return 16; + case ARM::tInt_eh_sjlj_longjmp: + return 10; case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: return 24; @@ -540,16 +581,17 @@ bool ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - switch (MI.getOpcode()) { default: break; case ARM::VMOVS: case ARM::VMOVD: case ARM::VMOVDneon: - case ARM::VMOVQ: { + case ARM::VMOVQ: + case ARM::VMOVQQ : { SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); return true; } case ARM::MOVr: @@ -564,6 +606,8 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, "Invalid ARM MOV instruction"); SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); return true; } } @@ -654,10 +698,8 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. if (DestRC == ARM::tGPRRegisterClass) @@ -679,6 +721,12 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, SrcRC == ARM::QPR_8RegisterClass) SrcRC = ARM::QPRRegisterClass; + // Allow QQPR / QQPR_VFP2 cross-class copies. + if (DestRC == ARM::QQPR_VFP2RegisterClass) + DestRC = ARM::QQPRRegisterClass; + if (SrcRC == ARM::QQPR_VFP2RegisterClass) + SrcRC = ARM::QQPRRegisterClass; + // Disallow copies of unequal sizes. if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) return false; @@ -703,20 +751,36 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = ARM::VMOVDneon; else if (DestRC == ARM::QPRRegisterClass) Opc = ARM::VMOVQ; + else if (DestRC == ARM::QQPRRegisterClass) + Opc = ARM::VMOVQQ; + else if (DestRC == ARM::QQQQPRRegisterClass) + Opc = ARM::VMOVQQQQ; else return false; - AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg)); + AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg)); } return true; } +static const +MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, + unsigned Reg, unsigned SubIdx, unsigned State, + const TargetRegisterInfo *TRI) { + if (!SubIdx) + return MIB.addReg(Reg, State); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); + return MIB.addReg(Reg, State, SubIdx); +} + void ARMBaseInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -738,45 +802,82 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else { - assert((RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); + } else if (RC == ARM::QPRRegisterClass || + RC == ARM::QPR_VFP2RegisterClass || + RC == ARM::QPR_8RegisterClass) { // FIXME: Neon instructions should support predicates - if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) .addFrameIndex(FI).addImm(128) - .addMemOperand(MMO) - .addReg(SrcReg, getKillRegState(isKill))); + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); } else { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)). - addReg(SrcReg, getKillRegState(isKill)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) + .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } + } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + // FIXME: It's possible to only store part of the QQ register if the + // spilled def has a sub-register index. + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32)) + .addFrameIndex(FI).addImm(128); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + AddDefaultPred(MIB.addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + } + } else { + assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); } } void ARMBaseInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), MachineMemOperand::MOLoad, 0, @@ -791,20 +892,18 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else { - assert((RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); - if (Align >= 16 - && (getRegisterInfo().canRealignStack(MF))) { + } else if (RC == ARM::QPRRegisterClass || + RC == ARM::QPR_VFP2RegisterClass || + RC == ARM::QPR_8RegisterClass) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) .addFrameIndex(FI).addImm(128) .addMemOperand(MMO)); @@ -814,6 +913,40 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } + } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32)); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + } + } else { + assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); } } @@ -930,8 +1063,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } - } - else if (Opc == ARM::VMOVD) { + } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -957,6 +1089,56 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } + } else if (Opc == ARM::VMOVQ) { + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + if (MFI.getObjectAlignment(FI) >= 16 && + getRegisterInfo().canRealignStack(MF)) { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q)) + .addFrameIndex(FI).addImm(128) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) + .addImm(Pred).addReg(PredReg); + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ)) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) + .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(Pred).addReg(PredReg); + } + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstSubReg = MI->getOperand(0).getSubReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + if (MFI.getObjectAlignment(FI) >= 16 && + getRegisterInfo().canRealignStack(MF)) { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef), + DstSubReg) + .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg); + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef), + DstSubReg) + .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(Pred).addReg(PredReg); + } + } } return NewMI; @@ -985,12 +1167,13 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, Opc == ARM::tMOVtgpr2gpr || Opc == ARM::tMOVgpr2tgpr) { return true; - } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) { + } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD || + Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { return true; - } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { - return false; // FIXME } + // FIXME: VMOVQQ and VMOVQQQQ? + return false; } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 7a5630e..b566271 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -200,6 +200,11 @@ public: virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; const ARMSubtarget &getSubtarget() const { return Subtarget; } + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -257,17 +262,20 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index bc12187..82458d2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -259,10 +259,10 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, unsigned SubIdx) const { switch (SubIdx) { default: return 0; - case 1: - case 2: - case 3: - case 4: + case ARM::ssub_0: + case ARM::ssub_1: + case ARM::ssub_2: + case ARM::ssub_3: { // S sub-registers. if (A->getSize() == 8) { if (B == &ARM::SPR_8RegClass) @@ -273,22 +273,201 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &ARM::DPR_VFP2RegClass; } - assert(A->getSize() == 16 && "Expecting a Q register class!"); - if (B == &ARM::SPR_8RegClass) - return &ARM::QPR_8RegClass; - return &ARM::QPR_VFP2RegClass; - case 5: - case 6: - // D sub-registers. - if (B == &ARM::DPR_VFP2RegClass) + if (A->getSize() == 16) { + if (B == &ARM::SPR_8RegClass) + return &ARM::QPR_8RegClass; return &ARM::QPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return &ARM::QPR_8RegClass; + } + + if (A->getSize() == 32) { + if (B == &ARM::SPR_8RegClass) + return 0; // Do not allow coalescing! + return &ARM::QQPR_VFP2RegClass; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + return 0; // Do not allow coalescing! + } + case ARM::dsub_0: + case ARM::dsub_1: + case ARM::dsub_2: + case ARM::dsub_3: { + // D sub-registers. + if (A->getSize() == 16) { + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + if (A->getSize() == 32) { + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QQPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + if (B != &ARM::DPRRegClass) + return 0; // Do not allow coalescing! return A; } + case ARM::dsub_4: + case ARM::dsub_5: + case ARM::dsub_6: + case ARM::dsub_7: { + // D sub-registers of QQQQ registers. + if (A->getSize() == 64 && B == &ARM::DPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + + case ARM::qsub_0: + case ARM::qsub_1: { + // Q sub-registers. + if (A->getSize() == 32) { + if (B == &ARM::QPR_VFP2RegClass) + return &ARM::QQPR_VFP2RegClass; + if (B == &ARM::QPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + if (B == &ARM::QPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + case ARM::qsub_2: + case ARM::qsub_3: { + // Q sub-registers of QQQQ registers. + if (A->getSize() == 64 && B == &ARM::QPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + } return 0; } +bool +ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &SubIndices, + unsigned &NewSubIdx) const { + + unsigned Size = RC->getSize() * 8; + if (Size < 6) + return 0; + + NewSubIdx = 0; // Whole register. + unsigned NumRegs = SubIndices.size(); + if (NumRegs == 8) { + // 8 D registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[0] == ARM::dsub_0 && + SubIndices[1] == ARM::dsub_1 && + SubIndices[2] == ARM::dsub_2 && + SubIndices[3] == ARM::dsub_3 && + SubIndices[4] == ARM::dsub_4 && + SubIndices[5] == ARM::dsub_5 && + SubIndices[6] == ARM::dsub_6 && + SubIndices[7] == ARM::dsub_7); + } else if (NumRegs == 4) { + if (SubIndices[0] == ARM::qsub_0) { + // 4 Q registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[1] == ARM::qsub_1 && + SubIndices[2] == ARM::qsub_2 && + SubIndices[3] == ARM::qsub_3); + } else if (SubIndices[0] == ARM::dsub_0) { + // 4 D registers -> 1 QQ register. + if (Size >= 256 && + SubIndices[1] == ARM::dsub_1 && + SubIndices[2] == ARM::dsub_2 && + SubIndices[3] == ARM::dsub_3) { + if (Size == 512) + NewSubIdx = ARM::qqsub_0; + return true; + } + } else if (SubIndices[0] == ARM::dsub_4) { + // 4 D registers -> 1 QQ register (2nd). + if (Size == 512 && + SubIndices[1] == ARM::dsub_5 && + SubIndices[2] == ARM::dsub_6 && + SubIndices[3] == ARM::dsub_7) { + NewSubIdx = ARM::qqsub_1; + return true; + } + } else if (SubIndices[0] == ARM::ssub_0) { + // 4 S registers -> 1 Q register. + if (Size >= 128 && + SubIndices[1] == ARM::ssub_1 && + SubIndices[2] == ARM::ssub_2 && + SubIndices[3] == ARM::ssub_3) { + if (Size >= 256) + NewSubIdx = ARM::qsub_0; + return true; + } + } + } else if (NumRegs == 2) { + if (SubIndices[0] == ARM::qsub_0) { + // 2 Q registers -> 1 QQ register. + if (Size >= 256 && SubIndices[1] == ARM::qsub_1) { + if (Size == 512) + NewSubIdx = ARM::qqsub_0; + return true; + } + } else if (SubIndices[0] == ARM::qsub_2) { + // 2 Q registers -> 1 QQ register (2nd). + if (Size == 512 && SubIndices[1] == ARM::qsub_3) { + NewSubIdx = ARM::qqsub_1; + return true; + } + } else if (SubIndices[0] == ARM::dsub_0) { + // 2 D registers -> 1 Q register. + if (Size >= 128 && SubIndices[1] == ARM::dsub_1) { + if (Size >= 256) + NewSubIdx = ARM::qsub_0; + return true; + } + } else if (SubIndices[0] == ARM::dsub_2) { + // 2 D registers -> 1 Q register (2nd). + if (Size >= 256 && SubIndices[1] == ARM::dsub_3) { + NewSubIdx = ARM::qsub_1; + return true; + } + } else if (SubIndices[0] == ARM::dsub_4) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::dsub_5) { + NewSubIdx = ARM::qsub_2; + return true; + } + } else if (SubIndices[0] == ARM::dsub_6) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::dsub_7) { + NewSubIdx = ARM::qsub_3; + return true; + } + } else if (SubIndices[0] == ARM::ssub_0) { + // 2 S registers -> 1 D register. + if (SubIndices[1] == ARM::ssub_1) { + if (Size >= 128) + NewSubIdx = ARM::dsub_0; + return true; + } + } else if (SubIndices[0] == ARM::ssub_2) { + // 2 S registers -> 1 D register (2nd). + if (Size >= 128 && SubIndices[1] == ARM::ssub_3) { + NewSubIdx = ARM::dsub_1; + return true; + } + } + } + return false; +} + + const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; @@ -481,7 +660,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((DisableFramePointerElim(MF) && MFI->hasCalls())|| + return ((DisableFramePointerElim(MF) && MFI->adjustsStack())|| needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -509,7 +688,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (DisableFramePointerElim(MF) && MFI->hasCalls()) + if (DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); @@ -545,24 +724,25 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { I != E; ++I) { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (!I->getOperand(i).isFI()) continue; - - const TargetInstrDesc &Desc = TII.get(I->getOpcode()); - unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - if (AddrMode == ARMII::AddrMode3 || - AddrMode == ARMII::AddrModeT2_i8) - return (1 << 8) - 1; - - if (AddrMode == ARMII::AddrMode5 || - AddrMode == ARMII::AddrModeT2_i8s4) + switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { + case ARMII::AddrMode3: + case ARMII::AddrModeT2_i8: + Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode5: + case ARMII::AddrModeT2_i8s4: Limit = std::min(Limit, ((1U << 8) - 1) * 4); - - if (AddrMode == ARMII::AddrModeT2_i12 && hasFP(MF)) - // When the stack offset is negative, we will end up using - // the i8 instructions instead. - return (1 << 8) - 1; - - if (AddrMode == ARMII::AddrMode6) + break; + case ARMII::AddrModeT2_i12: + if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1); + break; + case ARMII::AddrMode6: + // Addressing mode 6 (load/store) instructions can't encode an + // immediate offset for stack references. return 0; + default: + break; + } break; // At most one FI per instruction } } @@ -750,7 +930,9 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, while (NumExtras && !UnspilledCS1GPRs.empty()) { unsigned Reg = UnspilledCS1GPRs.back(); UnspilledCS1GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { + if (!isReservedReg(MF, Reg) && + (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || + Reg == ARM::LR)) { Extras.push_back(Reg); NumExtras--; } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 456c392..2c9c82d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -81,6 +81,15 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + /// canCombinedSubRegIndex - Given a register class and a list of sub-register + /// indices, return true if it's possible to combine the sub-register indices + /// into one that corresponds to a larger sub-register. Return the new sub- + /// register index by reference. Note the new index by be zero if the given + /// sub-registers combined to form the whole register. + virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &SubIndices, + unsigned &NewSubIdx) const; + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index f84f85a..f2730fc 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -88,6 +88,7 @@ namespace { void emitWordLE(unsigned Binary); void emitDWordLE(uint64_t Binary); void emitConstPoolInstruction(const MachineInstr &MI); + void emitMOVi32immInstruction(const MachineInstr &MI); void emitMOVi2piecesInstruction(const MachineInstr &MI); void emitLEApcrelJTInstruction(const MachineInstr &MI); void emitPseudoMoveInstruction(const MachineInstr &MI); @@ -145,6 +146,15 @@ namespace { return getMachineOpValue(MI, MI.getOperand(OpIdx)); } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the + /// machine operand requires relocation, record the relocation and return zero. + unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, + unsigned Reloc); + unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, + unsigned Reloc) { + return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc); + } + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. /// unsigned getShiftOp(unsigned Imm) const ; @@ -217,6 +227,31 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { return 0; } +/// getMovi32Value - Return binary encoding of operand for movw/movt. If the +/// machine operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, + const MachineOperand &MO, + unsigned Reloc) { + assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) + && "Relocation to this function should be for movt or movw"); + + if (MO.isImm()) + return static_cast<unsigned>(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), Reloc, true, false); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), Reloc); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), Reloc); + else { +#ifndef NDEBUG + errs() << MO; +#endif + llvm_unreachable("Unsupported operand type for movw/movt"); + } + return 0; +} + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, @@ -438,6 +473,42 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { } } +void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + + // Emit the 'movw' instruction. + unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 + + unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm12 + Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12 + Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4 + emitWordLE(Binary); + + unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16; + // Emit the 'movt' instruction. + Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm1, same as movw above. + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); +} + void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { const MachineOperand &MO0 = MI.getOperand(0); const MachineOperand &MO1 = MI.getOperand(1); @@ -557,7 +628,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { switch (Opcode) { default: llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); - // FIXME: Add support for MOVimm32. case TargetOpcode::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. @@ -604,6 +674,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { emitMiscLoadStoreInstruction(MI, ARM::PC); break; } + + case ARM::MOVi32imm: + emitMOVi32immInstruction(MI); + break; + case ARM::MOVi2pieces: // Two instructions to materialize a constant. emitMOVi2piecesInstruction(MI); @@ -706,10 +781,6 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); - if (TID.Opcode == ARM::BFC) { - report_fatal_error("ARMv6t2 JIT is not yet supported."); - } - // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -729,6 +800,45 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + if (TID.Opcode == ARM::MOVi16) { + // Get immediate from MI. + unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movw); + // Encode imm which is the same as in emitMOVi32immInstruction(). + Binary |= Lo16 & 0xFFF; + Binary |= ((Lo16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if(TID.Opcode == ARM::MOVTi16) { + unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movt) >> 16); + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) { + uint32_t v = ~MI.getOperand(2).getImm(); + int32_t lsb = CountTrailingZeros_32(v); + int32_t msb = (32 - CountLeadingZeros_32(v)) - 1; + // Instr{20-16} = msb, Instr{11-7} = lsb + Binary |= (msb & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) { + // Encode Rn in Instr{0-3} + Binary |= getMachineOpValue(MI, OpIdx++); + + uint32_t lsb = MI.getOperand(OpIdx++).getImm(); + uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1; + + // Instr{20-16} = widthm1, Instr{11-7} = lsb + Binary |= (widthm1 & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } + // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) ++OpIdx; @@ -1366,18 +1476,66 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { break; ++NumRegs; } - Binary |= NumRegs * 2; + // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0) + // Otherwise, it will be 0, in the case of 32-bit registers. + if(Binary & 0x100) + Binary |= NumRegs * 2; + else + Binary |= NumRegs; emitWordLE(Binary); } void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); // Set the conditional execution predicate Binary |= II->getPredicate(&MI) << ARMII::CondShift; + switch(Opcode) { + default: + llvm_unreachable("ARMCodeEmitter::emitMiscInstruction"); + + case ARM::FMSTAT: + // No further encoding needed. + break; + + case ARM::VMRS: + case ARM::VMSR: { + const MachineOperand &MO0 = MI.getOperand(0); + // Encode Rt. + Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg()) + << ARMII::RegRdShift; + break; + } + + case ARM::FCONSTD: + case ARM::FCONSTS: { + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 0); + + // Encode imm., Table A7-18 VFP modified immediate constants + const MachineOperand &MO1 = MI.getOperand(1); + unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF() + .bitcastToAPInt().getHiBits(32).getLimitedValue()); + unsigned ModifiedImm; + + if(Opcode == ARM::FCONSTS) + ModifiedImm = (Imm & 0x80000000) >> 24 | // a + (Imm & 0x03F80000) >> 19; // bcdefgh + else // Opcode == ARM::FCONSTD + ModifiedImm = (Imm & 0x80000000) >> 24 | // a + (Imm & 0x007F0000) >> 16; // bcdefgh + + // Insts{19-16} = abcd, Insts{3-0} = efgh + Binary |= ((ModifiedImm & 0xF0) >> 4) << 16; + Binary |= (ModifiedImm & 0xF); + break; + } + } + emitWordLE(Binary); } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 845d088..c87f5d7 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -29,6 +29,7 @@ namespace { ARMExpandPseudo() : MachineFunctionPass(&ID) {} const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -37,11 +38,31 @@ namespace { } private: + void TransferImpOps(MachineInstr &OldMI, + MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); }; char ARMExpandPseudo::ID = 0; } +/// TransferImpOps - Transfer implicit operands on the pseudo instruction to +/// the instructions created from the expansion. +void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, + MachineInstrBuilder &UseMI, + MachineInstrBuilder &DefMI) { + const TargetInstrDesc &Desc = OldMI.getDesc(); + for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = OldMI.getOperand(i); + assert(MO.isReg() && MO.getReg()); + if (MO.isUse()) + UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill())); + else + DefMI.addReg(MO.getReg(), + getDefRegState(true) | getDeadRegState(MO.isDead())); + } +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -58,52 +79,82 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; unsigned DstReg = MI.getOperand(0).getReg(); - if (!MI.getOperand(0).isDead()) { - MachineInstr *NewMI = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(NewLdOpc), DstReg) - .addOperand(MI.getOperand(1))); - NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) - .addReg(DstReg, getDefRegState(true)) - .addReg(DstReg) - .addOperand(MI.getOperand(2)); - } + bool DstIsDead = MI.getOperand(0).isDead(); + MachineInstrBuilder MIB1 = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(NewLdOpc), DstReg) + .addOperand(MI.getOperand(1))); + (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::tPICADD)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addOperand(MI.getOperand(2)); + TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); Modified = true; break; } + case ARM::t2MOVi32imm: { + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); - if (!MI.getOperand(0).isDead()) { - const MachineOperand &MO = MI.getOperand(1); - MachineInstrBuilder LO16, HI16; - - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), - DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true)).addReg(DstReg); - - if (MO.isImm()) { - unsigned Imm = MO.getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - LO16 = LO16.addImm(Lo16); - HI16 = HI16.addImm(Hi16); - } else { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); - // FIXME: What's about memoperands? - } - AddDefaultPred(LO16); - AddDefaultPred(HI16); + bool DstIsDead = MI.getOperand(0).isDead(); + const MachineOperand &MO = MI.getOperand(1); + MachineInstrBuilder LO16, HI16; + + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); } + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); + Modified = true; + break; + } + + case ARM::VMOVQQ: { + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0); + unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1); + unsigned SrcReg = MI.getOperand(1).getReg(); + bool SrcIsKill = MI.getOperand(1).isKill(); + unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0); + unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1); + MachineInstrBuilder Even = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::VMOVQ)) + .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); + MachineInstrBuilder Odd = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::VMOVQ)) + .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); + TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); Modified = true; } - // FIXME: expand t2MOVi32imm } MBBI = NMBBI; } @@ -113,6 +164,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 616942c..9baef6b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -37,7 +37,8 @@ using namespace llvm; static cl::opt<bool> UseRegSeq("neon-reg-sequence", cl::Hidden, - cl::desc("Use reg_sequence to model ld / st of multiple neon regs")); + cl::desc("Use reg_sequence to model ld / st of multiple neon regs"), + cl::init(true)); //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine @@ -164,15 +165,34 @@ private: ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); + SDNode *SelectConcatVector(SDNode *N); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); - /// PairDRegs - Insert a pair of double registers into an implicit def to - /// form a quad register. + /// PairDRegs - Form a quad register from a pair of D registers. + /// SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); + + /// PairDRegs - Form a quad register pair from a pair of Q registers. + /// + SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); + + /// QuadDRegs - Form a quad register pair from a quad of D registers. + /// + SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + + /// QuadQRegs - Form 4 consecutive Q registers. + /// + SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + + /// OctoDRegs - Form 8 consecutive D registers. + /// + SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, + SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; } @@ -940,13 +960,13 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -/// PairDRegs - Insert a pair of double registers into an implicit def to -/// form a quad register. +/// PairDRegs - Form a quad register from a pair of D registers. +/// SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); - SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); - SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); - if (UseRegSeq) { + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + if (llvm::ModelWithRegSequence()) { const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } @@ -958,6 +978,62 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { VT, SDValue(Pair, 0), V1, SubReg1); } +/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. +/// +SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + +/// QuadDRegs - Form 4 consecutive D registers. +/// +SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + +/// QuadQRegs - Form 4 consecutive Q registers. +/// +SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + +/// OctoDRegs - Form 8 consecutive D registers. +/// +SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3, + SDValue V4, SDValue V5, + SDValue V6, SDValue V7) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32); + SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32); + SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32); + SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32); + SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32); + const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3, + V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16); +} + /// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type /// for a 64-bit subregister of the vector. static EVT GetNEONSubregVT(EVT VT) { @@ -1011,7 +1087,34 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + if (!llvm::ModelWithRegSequence() || NumVecs < 2) + return VLd; + + SDValue RegSeq; + SDValue V0 = SDValue(VLd, 0); + SDValue V1 = SDValue(VLd, 1); + + // Form a REG_SEQUENCE to force register allocation. + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else { + SDValue V2 = SDValue(VLd, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLd, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), D); + } + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); + return NULL; } EVT RegVT = GetNEONSubregVT(VT); @@ -1026,9 +1129,24 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, Chain = SDValue(VLd, 2 * NumVecs); // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + if (NumVecs == 1) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); + } else { + SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, + SDValue(VLd, 0), SDValue(VLd, 1), + SDValue(VLd, 2), SDValue(VLd, 3)), 0); + SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); + SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); + } + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1051,10 +1169,37 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); - // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + SDValue V0 = SDValue(VLdA, 0); + SDValue V1 = SDValue(VLdB, 0); + SDValue V2 = SDValue(VLdA, 1); + SDValue V3 = SDValue(VLdB, 1); + SDValue V4 = SDValue(VLdA, 2); + SDValue V5 = SDValue(VLdB, 2); + SDValue V6 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdA, 3); + SDValue V7 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdB, 3); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, + V4, V5, V6, V7), 0); + + // Extract out the 3 / 4 Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), Q); + } + } else { + // Combine the even and odd subregs to produce the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } ReplaceUses(SDValue(N, NumVecs), Chain); @@ -1102,12 +1247,43 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Align); if (is64BitVector) { - unsigned Opc = DOpcodes[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + if (llvm::ModelWithRegSequence() && NumVecs >= 2) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + + // Form a REG_SEQUENCE to force register allocation. + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else { + SDValue V2 = N->getOperand(2+3); + // If it's a vld3, form a quad D-register and leave the last part as + // an undef. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, + RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, + RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, + RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, + RegSeq)); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); + unsigned Opc = DOpcodes[OpcodeIndex]; return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } @@ -1116,48 +1292,114 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VST1 and VST2, // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(Vec+3))); + if (llvm::ModelWithRegSequence() && NumVecs == 2) { + // First extract the pair of Q registers. + SDValue Q0 = N->getOperand(3); + SDValue Q1 = N->getOperand(4); + + // Form a QQ register. + SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT, + QQ)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3))); + } + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), + 5 + 2 * NumVecs); } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - 5 + 2 * NumVecs); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. + if (llvm::ModelWithRegSequence()) { + // Form the QQQQ REG_SEQUENCE. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3)); + V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); - Ops.push_back(Reg0); // post-access address offset + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); - // Store the even subregs. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - - // Store the odd subregs. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(Vec+3)); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; + // Store the even D registers. + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + Ops.push_back(Reg0); // post-access address offset + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, + RegVT, RegSeq)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd D registers. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, + RegVT, RegSeq); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } else { + Ops.push_back(Reg0); // post-access address offset + + // Store the even subregs. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd subregs. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1180,11 +1422,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, // Quad registers are handled by load/store of subregs. Find the subreg info. unsigned NumElts = 0; int SubregIdx = 0; + bool Even = false; EVT RegVT = VT; if (!is64BitVector) { RegVT = GetNEONSubregVT(VT); NumElts = RegVT.getVectorNumElements(); - SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1; + Even = Lane < NumElts; } unsigned OpcodeIndex; @@ -1211,8 +1455,35 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned Opc = 0; if (is64BitVector) { Opc = DOpcodes[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + if (llvm::ModelWithRegSequence()) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, + RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, + RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, + RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, + RegSeq)); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } } else { // Check if this is loading the even or odd subreg of a Q register. if (Lane < NumElts) { @@ -1221,10 +1492,32 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, Lane -= NumElts; Opc = QOpcodes1[OpcodeIndex]; } - // Extract the subregs of the input vector. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(Vec+3))); + + if (llvm::ModelWithRegSequence()) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); + } else { + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + } + + // Extract the subregs of the input vector. + unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, + RegSeq)); + } else { + // Extract the subregs of the input vector. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(Vec+3))); + } } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); @@ -1236,8 +1529,60 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); - SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); + + if (llvm::ModelWithRegSequence()) { + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + if (is64BitVector) { + SDValue V0 = SDValue(VLdLn, 0); + SDValue V1 = SDValue(VLdLn, 1); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = SDValue(VLdLn, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLdLn, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + } else { + // For 128-bit vectors, take the 64-bit results of the load and insert them + // as subregs into the result. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + if (Even) { + V[i] = SDValue(VLdLn, Vec); + V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + } else { + V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + V[i+1] = SDValue(VLdLn, Vec); + } + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + + if (NumVecs == 2) + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); + else + RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); + } + + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); + return NULL; + } + // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; @@ -1481,6 +1826,21 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { + // The only time a CONCAT_VECTORS operation can have legal types is when + // two 64-bit vectors are concatenated to a 128-bit vector. + EVT VT = N->getValueType(0); + if (!VT.is128BitVector() || N->getNumOperands() != 2) + llvm_unreachable("unexpected CONCAT_VECTORS"); + DebugLoc dl = N->getDebugLoc(); + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + SDNode *ARMDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); @@ -1695,8 +2055,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; - return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other, - Ops, 5); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl, + MVT::v2f64, MVT::Other, Ops, 5); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + return Ret; } // Other cases are autogenerated. break; @@ -1712,7 +2076,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), N->getOperand(2), AM5Opc, Pred, PredReg, Chain }; - return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); + SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6); + cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); + return Ret; } // Other cases are autogenerated. break; @@ -1971,7 +2339,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } } + break; } + + case ISD::CONCAT_VECTORS: + return SelectConcatVector(N); } return SelectCode(N); @@ -1995,3 +2367,9 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new ARMDAGToDAGISel(TM, OptLevel); } + +/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model +/// operations involving sub-registers. +bool llvm::ModelWithRegSequence() { + return UseRegSeq; +} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d3842a6..b8126a3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -94,7 +94,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, } setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); + if (llvm::ModelWithRegSequence()) + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); + else + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); @@ -360,8 +363,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::BSWAP, MVT::i32, Expand); // These are expanded into libcalls. - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); + if (!Subtarget->hasDivide()) { + // v7M has a hardware divider + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); @@ -373,6 +379,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -387,7 +395,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { + // If the subtarget does not have extract instructions, sign_extend_inreg + // needs to be expanded. Extract is available in ARM mode on v6 and up, + // and on most Thumb2 implementations. + if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops()) + || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } @@ -400,6 +412,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -451,9 +465,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::MUL); setStackPointerRegisterToSaveRestore(ARM::SP); - setSchedulingPreference(SchedulingForRegPressure); + + if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + setSchedulingPreference(Sched::RegPressure); + else + setSchedulingPreference(Sched::Hybrid); // FIXME: If-converter should use instruction latency to determine // profitability rather than relying on fixed limits. @@ -567,11 +586,35 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +/// getRegClassFor - Return the register class that should be used for the +/// specified value type. +TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { + // Map v4i64 to QQ registers but do not make the type legal. Similarly map + // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to + // load / store 4 to 8 consecutive D registers. + if (Subtarget->hasNEON()) { + if (VT == MVT::v4i64) + return ARM::QQPRRegisterClass; + else if (VT == MVT::v8i64) + return ARM::QQQQPRRegisterClass; + } + return TargetLowering::getRegClassFor(VT); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1; } +Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + EVT VT = N->getValueType(i); + if (VT.isFloatingPoint() || VT.isVector()) + return Sched::Latency; + } + return Sched::RegPressure; +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -1507,6 +1550,23 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, } SDValue +ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + SDValue Val = Subtarget->isThumb() ? + DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : + DAG.getConstant(0, MVT::i32); + return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), + Op.getOperand(1), Val); +} + +SDValue +ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1), DAG.getConstant(0, MVT::i32)); +} + +SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { @@ -1545,12 +1605,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } - case Intrinsic::eh_sjlj_setjmp: - SDValue Val = Subtarget->isThumb() ? - DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : - DAG.getConstant(0, MVT::i32); - return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1), - Val); } } @@ -1652,7 +1706,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2092,9 +2146,31 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } +SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(4, MVT::i32); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), + NULL, 0, false, false, 0); + } + + // Return LR, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); +} + SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -2107,116 +2183,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } -SDValue -ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { - // Do repeated 4-byte loads and stores. To be improved. - // This requires 4-byte alignment. - if ((Align & 3) != 0) - return SDValue(); - // This requires the copy size to be a constant, preferrably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) - return SDValue(); - - unsigned BytesLeft = SizeVal & 3; - unsigned NumMemOps = SizeVal >> 2; - unsigned EmittedNumMemOps = 0; - EVT VT = MVT::i32; - unsigned VTSize = 4; - unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; - uint64_t SrcOff = 0, DstOff = 0; - - // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the - // same number of stores. The loads and stores will get combined into - // ldm/stm later on. - while (EmittedNumMemOps < NumMemOps) { - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); - TFOps[i] = Loads[i].getValue(1); - SrcOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, isVolatile, false, 0); - DstOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - EmittedNumMemOps += i; - } - - if (BytesLeft == 0) - return Chain; - - // Issue loads / stores for the trailing (1 - 3) bytes. - unsigned BytesLeftSave = BytesLeft; - i = 0; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, false, false, 0); - TFOps[i] = Loads[i].getValue(1); - ++i; - SrcOff += VTSize; - BytesLeft -= VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - i = 0; - BytesLeft = BytesLeftSave; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, false, false, 0); - ++i; - DstOff += VTSize; - BytesLeft -= VTSize; - } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); -} - /// ExpandBIT_CONVERT - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -2434,9 +2400,9 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), - DAG.getConstant(0, MVT::i32)); + DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, MVT::i32)); // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. @@ -2879,21 +2845,60 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } } - // If there are only 2 elements in a 128-bit vector, insert them into an - // undef vector. This handles the common case for 128-bit vector argument - // passing, where the insertions should be translated to subreg accesses - // with no real instructions. - if (VT.is128BitVector() && Op.getNumOperands() == 2) { - SDValue Val = DAG.getUNDEF(VT); - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - if (Op0.getOpcode() != ISD::UNDEF) - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0, - DAG.getIntPtrConstant(0)); - if (Op1.getOpcode() != ISD::UNDEF) - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1, - DAG.getIntPtrConstant(1)); - return Val; + // Scan through the operands to see if only one value is used. + unsigned NumElts = VT.getVectorNumElements(); + bool isOnlyLowElement = true; + bool usesOnlyOneValue = true; + bool isConstant = true; + SDValue Value; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) + isConstant = false; + + if (!Value.getNode()) + Value = V; + else if (V != Value) + usesOnlyOneValue = false; + } + + if (!Value.getNode()) + return DAG.getUNDEF(VT); + + if (isOnlyLowElement) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); + + // If all elements are constants, fall back to the default expansion, which + // will generate a load from the constant pool. + if (isConstant) + return SDValue(); + + // Use VDUP for non-constant splats. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (usesOnlyOneValue && EltSize <= 32) + return DAG.getNode(ARMISD::VDUP, dl, VT, Value); + + // Vectors with 32- or 64-bit elements can be built by directly assigning + // the subregisters. + if (EltSize >= 32) { + // Do the expansion with floating-point types, since that is what the VFP + // registers are defined to use, and since i64 is not legal. + EVT EltVT = EVT::getFloatingPointVT(EltSize); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); + SDValue Val = DAG.getUNDEF(VecVT); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Elt = Op.getOperand(i); + if (Elt.getOpcode() == ISD::UNDEF) + continue; + Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt); + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt, + DAG.getConstant(i, MVT::i32)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } return SDValue(); @@ -3083,8 +3088,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. - if (VT.getVectorNumElements() == 4 && - (VT.is128BitVector() || VT.is64BitVector())) { + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts == 4) { unsigned PFIndexes[4]; for (unsigned i = 0; i != 4; ++i) { if (ShuffleMask[i] < 0) @@ -3096,7 +3101,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); @@ -3104,6 +3108,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } + // Implement shuffles with 32- or 64-bit elements as subreg copies. + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize >= 32) { + // Do the expansion with floating-point types, since that is what the VFP + // registers are defined to use, and since i64 is not legal. + EVT EltVT = EVT::getFloatingPointVT(EltSize); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); + V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); + V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); + SDValue Val = DAG.getUNDEF(VecVT); + for (unsigned i = 0; i < NumElts; ++i) { + if (ShuffleMask[i] < 0) + continue; + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + ShuffleMask[i] < (int)NumElts ? V1 : V2, + DAG.getConstant(ShuffleMask[i] & (NumElts-1), + MVT::i32)); + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, + Elt, DAG.getConstant(i, MVT::i32)); + } + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); + } + return SDValue(); } @@ -3158,9 +3185,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); - case ISD::RETURNADDR: break; + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); + case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); @@ -3667,6 +3696,62 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } +static SDValue PerformMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + if (Subtarget->isThumb1Only()) + return SDValue(); + + if (DAG.getMachineFunction(). + getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return SDValue(); + + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!C) + return SDValue(); + + uint64_t MulAmt = C->getZExtValue(); + unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + ShiftAmt = ShiftAmt & (32 - 1); + SDValue V = N->getOperand(0); + DebugLoc DL = N->getDebugLoc(); + + SDValue Res; + MulAmt >>= ShiftAmt; + if (isPowerOf2_32(MulAmt - 1)) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(Log2_32(MulAmt-1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmt + 1)) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(Log2_32(MulAmt+1), + MVT::i32)), + V); + } else + return SDValue(); + + if (ShiftAmt != 0) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getConstant(ShiftAmt, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -4053,6 +4138,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); + case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: @@ -4432,9 +4518,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); } else return false; @@ -4442,13 +4530,25 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, bool isLegal = false; if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, - isInc, DAG); + isInc, DAG); else isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) return false; + if (Ptr != Base) { + // Swap base ptr and offset to catch more post-index load / store when + // it's legal. In Thumb2 mode, offset must be an immediate. + if (Ptr == Offset && Op->getOpcode() == ISD::ADD && + !Subtarget->isThumb2()) + std::swap(Base, Offset); + + // Post-indexed load / store update the base pointer. + if (Ptr != Base) + return false; + } + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; return true; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d8a230f..9c7517c 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -236,13 +236,19 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - virtual const ARMSubtarget* getSubtarget() const { + const ARMSubtarget* getSubtarget() const { return Subtarget; } + /// getRegClassFor - Return the register class that should be used for the + /// specified value type. + virtual TargetRegisterClass *getRegClassFor(EVT VT) const; + /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; + bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -281,7 +287,8 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const; - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -296,20 +303,12 @@ namespace llvm { SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index b466d0d..d487df1 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -228,7 +228,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin, "", itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; } @@ -240,7 +240,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -252,7 +252,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = !strconcat(opc, asm); + let AsmString = !strconcat(opc, asm); let Pattern = pattern; let isPredicable = 0; list<Predicate> Predicates = [IsARM]; @@ -268,7 +268,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -280,7 +280,7 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsARM]; } @@ -959,7 +959,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb]; } @@ -995,7 +995,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; } @@ -1140,7 +1140,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); - let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; } @@ -1152,7 +1152,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; } @@ -1163,7 +1163,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; } @@ -1280,7 +1280,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); - let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; } @@ -1292,7 +1292,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; - let AsmString = asm; + let AsmString = asm; let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ce5f2f8..f3156d9 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -46,6 +46,7 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; +def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>; def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; @@ -100,7 +101,10 @@ def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>; def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; -def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; +def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", + SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; +def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", + SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7, [SDNPHasChain]>; @@ -128,6 +132,8 @@ def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; +def HasDivide : Predicate<"Subtarget->hasDivide()">; +def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">; def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">; @@ -654,12 +660,12 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, - "@ ADJCALLSTACKUP $amt1", + "${:comment} ADJCALLSTACKUP $amt1", [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, - "@ ADJCALLSTACKDOWN $amt", + "${:comment} ADJCALLSTACKDOWN $amt", [(ARMcallseq_start timm:$amt)]>; } @@ -789,8 +795,11 @@ def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", } // A5.4 Permanently UNDEFINED instructions. -def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "", - [/* For disassembly only; pattern left blank */]>, +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to +// binutils +let isBarrier = 1, isTerminator = 1 in +def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, + ".long 0xe7ffdefe ${:comment} trap", [(trap)]>, Requires<[IsARM]> { let Inst{27-25} = 0b011; let Inst{24-20} = 0b11111; @@ -843,25 +852,19 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), // LEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, IIC_iALUi, - !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), - []>; + "adr$p\t$dst, #$label", []>; def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), - Pseudo, IIC_iALUi, - !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, " - "(${label}_${id}-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), - []> { + Pseudo, IIC_iALUi, + "adr$p\t$dst, #${label}_${id}", []> { let Inst{25} = 1; } +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -1134,7 +1137,8 @@ def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, + isReMaterializable = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", []>; @@ -1156,7 +1160,7 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, "ldrsb", "\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr, "ldrd", "\t$dst1, $addr", @@ -1215,7 +1219,7 @@ def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb), "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>, Requires<[IsARM, HasV5TE]>; -} +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. @@ -1264,7 +1268,7 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStorer, "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; @@ -1356,7 +1360,7 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb), // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, LdStMulFrm, IIC_iLoadm, @@ -1367,9 +1371,9 @@ def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, IndexModeUpd, LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, LdStMulFrm, IIC_iStorem, @@ -1380,7 +1384,7 @@ def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, IndexModeUpd, LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>; -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq //===----------------------------------------------------------------------===// // Move Instructions. @@ -2198,6 +2202,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( +let neverHasSideEffects = 1 in { def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, IIC_iCMOVr, "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -2221,6 +2226,7 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), RegConstraint<"$false = $dst">, UnaryDP { let Inst{25} = 1; } +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -2528,12 +2534,12 @@ let Defs = def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" "add\t$val, pc, #8\n\t" "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" - "mov\tr0, #1 @ eh_setjmp end", "", + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } @@ -2543,16 +2549,30 @@ let Defs = def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" "add\t$val, pc, #8\n\t" "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" - "mov\tr0, #1 @ eh_setjmp end", "", + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } +// FIXME: Non-Darwin version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "ldr\tsp, [$src, #8]\n\t" + "ldr\t$scratch, [$src, #4]\n\t" + "ldr\tr7, [$src]\n\t" + "bx\t$scratch", "", + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsARM, IsDarwin]>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d5ce2b8..197ec16 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -115,7 +115,7 @@ def h64imm : Operand<i64> { // NEON load / store instructions //===----------------------------------------------------------------------===// -let mayLoad = 1 in { +let mayLoad = 1, neverHasSideEffects = 1 in { // Use vldmia to load a Q register as a D register pair. // This is equivalent to VLDMD except that it has a Q register operand // instead of a pair of D registers. @@ -123,11 +123,6 @@ def VLDMQ : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), IndexModeNone, IIC_fpLoadm, "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; -def VLDMQ_UPD - : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p), - IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}", - "$addr.base = $wb", []>; // Use vld1 to load a Q register as a D register pair. // This alternative to VLDMQ allows an alignment to be specified. @@ -135,13 +130,9 @@ def VLDMQ_UPD def VLD1q : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; -def VLD1q_UPD - : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64", - "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>; -} // mayLoad = 1 +} // mayLoad = 1, neverHasSideEffects = 1 -let mayStore = 1 in { +let mayStore = 1, neverHasSideEffects = 1 in { // Use vstmia to store a Q register as a D register pair. // This is equivalent to VSTMD except that it has a Q register operand // instead of a pair of D registers. @@ -149,11 +140,6 @@ def VSTMQ : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), IndexModeNone, IIC_fpStorem, "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; -def VSTMQ_UPD - : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p), - IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}", - "$addr.base = $wb", []>; // Use vst1 to store a Q register as a D register pair. // This alternative to VSTMQ allows an alignment to be specified. @@ -161,14 +147,9 @@ def VSTMQ_UPD def VST1q : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; -def VST1q_UPD - : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, QPR:$src), - IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset", - "$addr.addr = $wb", []>; -} // mayStore = 1 +} // mayStore = 1, neverHasSideEffects = 1 -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> @@ -492,9 +473,9 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">; // VLD3DUP : Vector Load (single 3-element structure to all lanes) // VLD4DUP : Vector Load (single 4-element structure to all lanes) // FIXME: Not yet implemented. -} // mayLoad = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> @@ -807,7 +788,7 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">; def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; -} // mayStore = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// @@ -815,27 +796,32 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">; //===----------------------------------------------------------------------===// // Extract D sub-registers of Q registers. -// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) def DSubReg_i8_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); }]>; def DSubReg_i16_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); }]>; def DSubReg_i32_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); }]>; def DSubReg_f64_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); }]>; def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()), + MVT::i32); }]>; // Extract S sub-registers of Q/D registers. -// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) def SSubReg_f32_reg : SDNodeXForm<imm, [{ - return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32); + assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); + return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); }]>; // Translate lane numbers from Q registers to D subregs. @@ -2829,11 +2815,21 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // VMOV : Vector Move (Register) +let neverHasSideEffects = 1 in { def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; +// Pseudo vector move instructions for QQ and QQQQ registers. This should +// be expanded after register allocation is completed. +def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), + NoItinerary, "${:comment} vmov\t$dst, $src", []>; + +def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), + NoItinerary, "${:comment} vmov\t$dst, $src", []>; +} // neverHasSideEffects + // VMOV : Vector Move (Immediate) // VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. @@ -2871,6 +2867,7 @@ def vmovImm64 : PatLeaf<(build_vector), [{ // Note: Some of the cmode bits in the following VMOV instructions need to // be encoded based on the immed values. +let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", @@ -2906,6 +2903,7 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; +} // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) @@ -3018,11 +3016,11 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; def : Pat<(v2f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; def : Pat<(v4f32 (scalar_to_vector SPR:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; def : Pat<(v8i8 (scalar_to_vector GPR:$src)), (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; @@ -3034,15 +3032,15 @@ def : Pat<(v2i32 (scalar_to_vector GPR:$src)), def : Pat<(v16i8 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; def : Pat<(v8i16 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; def : Pat<(v4i32 (scalar_to_vector GPR:$src)), (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), - arm_dsubreg_0)>; + dsub_0)>; // VDUP : Vector Duplicate (from ARM core register to all elements) @@ -3376,27 +3374,27 @@ def VTBX4 class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst> : NEONFPPat<(ResTy (OpNode SPR:$a)), (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0))), - arm_ssubreg_0)>; + SPR:$a, ssub_0))), + ssub_0)>; class N3VSPat<SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), (EXTRACT_SUBREG (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), + SPR:$a, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0))), - arm_ssubreg_0)>; + SPR:$b, ssub_0))), + ssub_0)>; class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$acc, arm_ssubreg_0), + SPR:$acc, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$a, arm_ssubreg_0), + SPR:$a, ssub_0), (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), - SPR:$b, arm_ssubreg_0)), - arm_ssubreg_0)>; + SPR:$b, ssub_0)), + ssub_0)>; // These need separate instructions because they must use DPR_VFP2 register // class which have SPR sub-registers. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index e3ca536..40f924b 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -127,12 +127,12 @@ def t_addrmode_sp : Operand<i32>, let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def tADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, - "@ tADJCALLSTACKUP $amt1", + "${:comment} tADJCALLSTACKUP $amt1", [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>; def tADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, - "@ tADJCALLSTACKDOWN $amt", + "${:comment} tADJCALLSTACKDOWN $amt", [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; } @@ -254,14 +254,14 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, // Pseudo instruction that will expand into a tSUBspi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), - NoItinerary, "@ sub\t$dst, $rhs", []>; + NoItinerary, "${:comment} sub\t$dst, $rhs", []>; def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - NoItinerary, "@ add\t$dst, $rhs", []>; + NoItinerary, "${:comment} add\t$dst, $rhs", []>; let Defs = [CPSR] in def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - NoItinerary, "@ and\t$dst, $rhs", []>; + NoItinerary, "${:comment} and\t$dst, $rhs", []>; } // usesCustomInserter //===----------------------------------------------------------------------===// @@ -374,7 +374,7 @@ let isBranch = 1, isTerminator = 1 in { // Far jump let Defs = [LR] in def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, - "bl\t$target\t@ far jump",[]>; + "bl\t$target\t${:comment} far jump",[]>; def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), @@ -417,9 +417,13 @@ def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>, } } -// A8.6.16 B: Encoding T1 -- for disassembly only +// A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1110 then UNDEFINED -def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 { +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to +// binutils +let isBarrier = 1, isTerminator = 1 in +def tTRAP : TI<(outs), (ins), IIC_Br, + ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 { let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1110; } @@ -476,7 +480,7 @@ def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). -let canFoldAsLoad = 1, mayLoad = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; @@ -490,7 +494,8 @@ def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59 // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, + isReMaterializable = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>, T1LdStSP<{1,?,?}>; @@ -527,7 +532,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, [(store tGPR:$src, t_addrmode_sp:$addr)]>, T1LdStSP<{0,?,?}>; -let mayStore = 1 in { +let mayStore = 1, neverHasSideEffects = 1 in { // Special instruction for spill. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, @@ -540,7 +545,7 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, // // These requires base address to be written back or one of the loaded regs. -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def tLDM : T1I<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, @@ -553,9 +558,9 @@ def tLDM_UPD : T1It<(outs tGPR:$wb), "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def tSTM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, @@ -866,11 +871,12 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), - NoItinerary, "@ tMOVCCr $cc", + NoItinerary, "${:comment} tMOVCCr $cc", [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; // 16-bit movcc in IT blocks for Thumb2. +let neverHasSideEffects = 1 in { def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, "mov", "\t$dst, $rhs", []>, T1Special<{1,0,?,?}>; @@ -878,9 +884,12 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi, "mov", "\t$dst, $rhs", []>, T1General<{1,0,0,?,?}>; +} // neverHasSideEffects // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p\t$dst, #$label", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 @@ -889,6 +898,7 @@ def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 +} // neverHasSideEffects //===----------------------------------------------------------------------===// // TLS Instructions @@ -918,16 +928,32 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; } + +// FIXME: Non-Darwin version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "ldr\t$scratch, [$src, #8]\n\t" + "mov\tsp, $scratch\n\t" + "ldr\t$scratch, [$src, #4]\n\t" + "ldr\tr7, [$src]\n\t" + "bx\t$scratch", "", + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsThumb, IsDarwin]>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -1011,7 +1037,7 @@ def : T1Pat<(i32 imm0_255_comp:$src), // scheduling. let isReMaterializable = 1 in def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 742bd40..b91c089 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -185,8 +185,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{15} = 0; } // register - def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - opc, ".w\t$dst, $src", + def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + opc, ".w\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -198,9 +198,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{5-4} = 0b00; // type } // shifted register - def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, - opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode t2_so_reg:$src))]> { + def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, + opc, ".w\t$dst, $src", + [(set GPR:$dst, (opnode t2_so_reg:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -210,7 +210,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode, } /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a -// binary operation that produces a value. These are predicable and can be +/// binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0, string wide =""> { @@ -259,23 +259,23 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_bin_irs counterpart. multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, - opc, ".w\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + opc, ".w\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. + let Inst{20} = ?; // The S bit. let Inst{15} = 0; } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, - opc, "\t$dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + opc, "\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; - let Inst{20} = 0; // The S bit. + let Inst{20} = ?; // The S bit. } } @@ -461,10 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, let Defs = [CPSR] in { multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { // shifted imm - def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s), - IIC_iALUi, - !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { + def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -472,10 +471,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> { let Inst{15} = 0; } // shifted register - def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s), - IIC_iALUsi, - !strconcat(opc, "${s}\t$dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { + def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, + !strconcat(opc, "s"), "\t$dst, $rhs, $lhs", + [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -639,7 +637,8 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set GPR:$dst, (opnode GPR:$src))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -650,7 +649,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -665,7 +665,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]> { + [(set GPR:$dst, (opnode GPR:$src))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -676,7 +677,8 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -717,7 +719,8 @@ multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, opc, "\t$dst, $LHS, $RHS", - [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]> { + [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -728,7 +731,8 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> { def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, - (rotr GPR:$RHS, rot_imm:$rot)))]> { + (rotr GPR:$RHS, rot_imm:$rot)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -771,6 +775,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> { // LEApcrel - Load a pc-relative address into a register without offending the // assembler. +let neverHasSideEffects = 1 in { +let isReMaterializable = 1 in def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p.w\t$dst, #$label", []> { let Inst{31-27} = 0b11110; @@ -792,6 +798,7 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } +} // neverHasSideEffects // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), @@ -856,9 +863,11 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), let Inst{15} = 0; } -// Signed and unsigned division, for disassembly only +// Signed and unsigned division on v7-M def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, - "sdiv", "\t$dst, $a, $b", []> { + "sdiv", "\t$dst, $a, $b", + [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>, + Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; let Inst{20} = 0b1; @@ -867,7 +876,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, } def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, - "udiv", "\t$dst, $a, $b", []> { + "udiv", "\t$dst, $a, $b", + [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>, + Requires<[HasDivide]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; let Inst{20} = 0b1; @@ -878,11 +889,11 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, // Pseudo instruction that will expand into a t2SUBrSPi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>; + NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>; def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - NoItinerary, "@ subw\t$dst, $sp, $imm", []>; + NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>; def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - NoItinerary, "@ sub\t$dst, $sp, $rhs", []>; + NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>; } // usesCustomInserter @@ -902,7 +913,7 @@ defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), (ins t2addrmode_imm8s4:$addr), @@ -912,7 +923,7 @@ def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2), "ldrd", "\t$dst1, $addr", []> { let Inst{19-16} = 0b1111; // Rn } -} +} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr), @@ -955,7 +966,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), (t2LDRHpci tconstpool:$addr)>; // Indexed loads -let mayLoad = 1 in { +let mayLoad = 1, neverHasSideEffects = 1 in { def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, @@ -1011,7 +1022,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; -} +} // mayLoad = 1, neverHasSideEffects = 1 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are // for disassembly only. @@ -1041,7 +1052,7 @@ defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword -let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in +let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr), IIC_iStorer, "strd", "\t$src1, $addr", []>; @@ -1204,7 +1215,7 @@ defm t2PLI : T2Ipl<1, 0, "pli">; // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> { @@ -1227,9 +1238,9 @@ def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, let Inst{21} = 1; // The W bit. let Inst{20} = 1; // Load } -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> { @@ -1253,7 +1264,7 @@ def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, let Inst{21} = 1; // The W bit. let Inst{20} = 0; // Store } -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq //===----------------------------------------------------------------------===// // Move Instructions. @@ -1564,9 +1575,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, } let Defs = [CPSR] in { -def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "lsrs.w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { +def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "lsrs", ".w\t$dst, $src, #1", + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -1577,9 +1588,9 @@ def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, let Inst{14-12} = 0b000; let Inst{7-6} = 0b01; } -def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "asrs.w\t$dst, $src, #1", - [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { +def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "asrs", ".w\t$dst, $src, #1", + [(set GPR:$dst, (ARMsra_flag GPR:$src))]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b0010; @@ -2058,7 +2069,8 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), - 0xFFFF0000)))]> { + 0xFFFF0000)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2068,15 +2080,18 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHBT where identities eliminate some nodes. def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), - (t2PKHBT GPR:$src1, GPR:$src2, 0)>; + (t2PKHBT GPR:$src1, GPR:$src2, 0)>, + Requires<[HasT2ExtractPack]>; def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), - (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; + (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>, + Requires<[HasT2ExtractPack]>; def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), - 0xFFFF)))]> { + 0xFFFF)))]>, + Requires<[HasT2ExtractPack]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-20} = 0b01100; @@ -2087,10 +2102,12 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), - (t2PKHTB GPR:$src1, GPR:$src2, 16)>; + (t2PKHTB GPR:$src1, GPR:$src2, 16)>, + Requires<[HasT2ExtractPack]>; def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), - (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; + (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>, + Requires<[HasT2ExtractPack]>; //===----------------------------------------------------------------------===// // Comparison Instructions... @@ -2127,6 +2144,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( +let neverHasSideEffects = 1 in { def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, @@ -2178,6 +2196,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -2378,13 +2397,13 @@ let Defs = D31 ] in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; @@ -2394,13 +2413,13 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" - "\tadds\t$val, #9\n" + "\tadds\t$val, #7\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; @@ -2672,7 +2691,7 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 36fcaa1..54474cf 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -76,7 +76,7 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // Load / store multiple Instructions. // -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeNone, IIC_fpLoadm, "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> { @@ -104,9 +104,9 @@ def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, "$addr.base = $wb", []> { let Inst{20} = 1; } -} // mayLoad, hasExtraDefRegAllocReq +} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeNone, IIC_fpStorem, "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> { @@ -134,7 +134,7 @@ def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, "$addr.base = $wb", []> { let Inst{20} = 0; } -} // mayStore, hasExtraSrcRegAllocReq +} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores @@ -313,6 +313,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), IIC_fpMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; +let neverHasSideEffects = 1 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src", @@ -326,6 +327,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } +} // neverHasSideEffects // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -337,6 +339,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, let Inst{7-6} = 0b00; } +let neverHasSideEffects = 1 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", @@ -606,6 +609,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, // FP Conditional moves. // +let neverHasSideEffects = 1 in { def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$false, DPR:$true), IIC_fpUNA64, "vmov", ".f64\t$dst, $true", @@ -629,7 +633,7 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; - +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Misc. @@ -651,6 +655,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", // FPSCR <-> GPR (for disassembly only) +let neverHasSideEffects = 1 in { let Uses = [FPSCR] in { def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", "\t$dst, fpscr", @@ -674,6 +679,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", let Inst{4} = 1; } } +} // neverHasSideEffects // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index b31a4fa..5f6d7ee 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -318,6 +318,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, *((intptr_t*)RelocPos) |= ResultPtr; break; } + case ARM::reloc_arm_movw: { + ResultPtr = ResultPtr & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } + case ARM::reloc_arm_movt: { + ResultPtr = (ResultPtr >> 16) & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } } } } diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 041afd0..8edfb9a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -23,16 +23,6 @@ namespace llvm { class ARMBaseInstrInfo; class Type; -namespace ARM { - /// SubregIndex - The index of various subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// ARMRegisterInfo.td file. - enum SubregIndex { - SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, - DSUBREG_0 = 5, DSUBREG_1 = 6 - }; -} - struct ARMRegisterInfo : public ARMBaseRegisterInfo { public: ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 0d4200c..6beca8b 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -23,6 +23,44 @@ class ARMFReg<bits<6> num, string n> : Register<n> { let Namespace = "ARM"; } +// Subregister indices. +let Namespace = "ARM" in { +// Note: Code depends on these having consecutive numbers. +def ssub_0 : SubRegIndex; +def ssub_1 : SubRegIndex; +def ssub_2 : SubRegIndex; // In a Q reg. +def ssub_3 : SubRegIndex; +def ssub_4 : SubRegIndex; // In a QQ reg. +def ssub_5 : SubRegIndex; +def ssub_6 : SubRegIndex; +def ssub_7 : SubRegIndex; +def ssub_8 : SubRegIndex; // In a QQQQ reg. +def ssub_9 : SubRegIndex; +def ssub_10 : SubRegIndex; +def ssub_11 : SubRegIndex; +def ssub_12 : SubRegIndex; +def ssub_13 : SubRegIndex; +def ssub_14 : SubRegIndex; +def ssub_15 : SubRegIndex; + +def dsub_0 : SubRegIndex; +def dsub_1 : SubRegIndex; +def dsub_2 : SubRegIndex; +def dsub_3 : SubRegIndex; +def dsub_4 : SubRegIndex; +def dsub_5 : SubRegIndex; +def dsub_6 : SubRegIndex; +def dsub_7 : SubRegIndex; + +def qsub_0 : SubRegIndex; +def qsub_1 : SubRegIndex; +def qsub_2 : SubRegIndex; +def qsub_3 : SubRegIndex; + +def qqsub_0 : SubRegIndex; +def qqsub_1 : SubRegIndex; +} + // Integer registers def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>; def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>; @@ -58,9 +96,9 @@ def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">; def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">; def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">; def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; -def SDummy : ARMFReg<63, "sINVALID">; // Aliases of the F* registers used to hold 64-bit fp values (doubles) +let SubRegIndices = [ssub_0, ssub_1] in { def D0 : ARMReg< 0, "d0", [S0, S1]>; def D1 : ARMReg< 1, "d1", [S2, S3]>; def D2 : ARMReg< 2, "d2", [S4, S5]>; @@ -77,6 +115,7 @@ def D12 : ARMReg<12, "d12", [S24, S25]>; def D13 : ARMReg<13, "d13", [S26, S27]>; def D14 : ARMReg<14, "d14", [S28, S29]>; def D15 : ARMReg<15, "d15", [S30, S31]>; +} // VFP3 defines 16 additional double registers def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">; @@ -89,6 +128,9 @@ def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">; def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">; // Advanced SIMD (NEON) defines 16 quad-word aliases +let SubRegIndices = [dsub_0, dsub_1], + CompositeIndices = [(ssub_2 dsub_1, ssub_0), + (ssub_3 dsub_1, ssub_1)] in { def Q0 : ARMReg< 0, "q0", [D0, D1]>; def Q1 : ARMReg< 1, "q1", [D2, D3]>; def Q2 : ARMReg< 2, "q2", [D4, D5]>; @@ -97,6 +139,8 @@ def Q4 : ARMReg< 4, "q4", [D8, D9]>; def Q5 : ARMReg< 5, "q5", [D10, D11]>; def Q6 : ARMReg< 6, "q6", [D12, D13]>; def Q7 : ARMReg< 7, "q7", [D14, D15]>; +} +let SubRegIndices = [dsub_0, dsub_1] in { def Q8 : ARMReg< 8, "q8", [D16, D17]>; def Q9 : ARMReg< 9, "q9", [D18, D19]>; def Q10 : ARMReg<10, "q10", [D20, D21]>; @@ -105,6 +149,51 @@ def Q12 : ARMReg<12, "q12", [D24, D25]>; def Q13 : ARMReg<13, "q13", [D26, D27]>; def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; +} + +// Pseudo 256-bit registers to represent pairs of Q registers. These should +// never be present in the emitted code. +// These are used for NEON load / store instructions, e.g. vld4, vst3. +// NOTE: It's possible to define more QQ registers since technical the +// starting D register number doesn't have to be multiple of 4. e.g. +// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register +// stuffs very messy. +let SubRegIndices = [qsub_0, qsub_1] in { +let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1), + (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1), + (ssub_6 qsub_1, ssub_2), (ssub_7 qsub_1, ssub_3)] in { +def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>; +def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>; +def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>; +def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>; +} +let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in { +def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>; +def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>; +def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>; +def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>; +} +} + +// Pseudo 512-bit registers to represent four consecutive Q registers. +let SubRegIndices = [qqsub_0, qqsub_1] in { +let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), + (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), + (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3), + (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1), + (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3), + (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5), + (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in { +def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; +def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; +} +let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), + (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), + (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in { +def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>; +def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; +} +} // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; @@ -270,11 +359,6 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>; -// Dummy f32 regclass to represent impossible subreg indices. -def SPR_INVALID : RegisterClass<"ARM", [f32], 32, [SDummy]> { - let CopyCost = -1; -} - // Scalar double precision floating point / generic 64-bit vector register // class. // ARM requires only word alignment for double. It's more performant if it @@ -284,7 +368,6 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31]> { - let SubRegClassList = [SPR_INVALID, SPR_INVALID]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -332,79 +415,68 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15]> { - let SubRegClassList = [SPR, SPR]; + let SubRegClasses = [(SPR ssub_0, ssub_1)]; } // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7]> { - let SubRegClassList = [SPR_8, SPR_8]; + let SubRegClasses = [(SPR_8 ssub_0, ssub_1)]; } // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { - let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID, - DPR, DPR]; + let SubRegClasses = [(DPR dsub_0, dsub_1)]; } // Subset of QPR that have 32-bit SPR subregs. def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> { - let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2]; + let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_VFP2 dsub_0, dsub_1)]; } // Subset of QPR that have DPR_8 and SPR_8 subregs. def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3]> { - let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8]; + let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_8 dsub_0, dsub_1)]; +} + +// Pseudo 256-bit vector register class to model pairs of Q registers +// (4 consecutive D registers). +def QQPR : RegisterClass<"ARM", [v4i64], + 256, + [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> { + let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), + (QPR qsub_0, qsub_1)]; +} + +// Subset of QQPR that have 32-bit SPR subregs. +def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], + 256, + [QQ0, QQ1, QQ2, QQ3]> { + let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3), + (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3), + (QPR_VFP2 qsub_0, qsub_1)]; + +} + +// Pseudo 512-bit vector register class to model 4 consecutive Q registers +// (8 consecutive D registers). +def QQQQPR : RegisterClass<"ARM", [v8i64], + 256, + [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> { + let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, + dsub_4, dsub_5, dsub_6, dsub_7), + (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; } // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; -//===----------------------------------------------------------------------===// -// Subregister Set Definitions... now that we have all of the pieces, define the -// sub registers for each register. -// - -def arm_ssubreg_0 : PatLeaf<(i32 1)>; -def arm_ssubreg_1 : PatLeaf<(i32 2)>; -def arm_ssubreg_2 : PatLeaf<(i32 3)>; -def arm_ssubreg_3 : PatLeaf<(i32 4)>; -def arm_dsubreg_0 : PatLeaf<(i32 5)>; -def arm_dsubreg_1 : PatLeaf<(i32 6)>; - -// S sub-registers of D registers. -def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [S0, S2, S4, S6, S8, S10, S12, S14, - S16, S18, S20, S22, S24, S26, S28, S30]>; -def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [S1, S3, S5, S7, S9, S11, S13, S15, - S17, S19, S21, S23, S25, S27, S29, S31]>; - -// S sub-registers of Q registers. -def : SubRegSet<1, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S0, S4, S8, S12, S16, S20, S24, S28]>; -def : SubRegSet<2, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S1, S5, S9, S13, S17, S21, S25, S29]>; -def : SubRegSet<3, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S2, S6, S10, S14, S18, S22, S26, S30]>; -def : SubRegSet<4, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7], - [S3, S7, S11, S15, S19, S23, S27, S31]>; - -// D sub-registers of Q registers. -def : SubRegSet<5, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], - [D0, D2, D4, D6, D8, D10, D12, D14, - D16, D18, D20, D22, D24, D26, D28, D30]>; -def : SubRegSet<6, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], - [D1, D3, D5, D7, D9, D11, D13, D15, - D17, D19, D21, D23, D25, D27, D29, D31]>; diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h index 2cc2950..86e7206 100644 --- a/lib/Target/ARM/ARMRelocations.h +++ b/lib/Target/ARM/ARMRelocations.h @@ -47,7 +47,13 @@ namespace llvm { reloc_arm_pic_jt, // reloc_arm_branch - Branch address relocation. - reloc_arm_branch + reloc_arm_branch, + + // reloc_arm_movt - MOVT immediate relocation. + reloc_arm_movt, + + // reloc_arm_movw - MOVW immediate relocation. + reloc_arm_movw }; } } diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index c04ee38..a289407 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -12,11 +12,123 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-selectiondag-info" -#include "ARMSelectionDAGInfo.h" +#include "ARMTargetMachine.h" using namespace llvm; -ARMSelectionDAGInfo::ARMSelectionDAGInfo() { +ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<ARMSubtarget>()) { } ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { } + +SDValue +ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff, false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index afe9a47..d7d00c2 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -19,9 +19,24 @@ namespace llvm { class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + public: - ARMSelectionDAGInfo(); + explicit ARMSelectionDAGInfo(const TargetMachine &TM); ~ARMSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; }; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b11580a..10fd257 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -39,6 +39,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , IsR9Reserved(ReserveR9) , UseMovt(UseMOVT) , HasFP16(false) + , HasHardwareDivide(false) + , HasT2ExtractPack(false) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. @@ -73,6 +75,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, unsigned SubVer = TT[Idx]; if (SubVer >= '7' && SubVer <= '9') { ARMArchVersion = V7A; + if (Len >= Idx+2 && TT[Idx+1] == 'm') + ARMArchVersion = V7M; } else if (SubVer == '6') { ARMArchVersion = V6; if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 288a19a..8332bba 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,7 +26,7 @@ class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4, V4T, V5T, V5TE, V6, V6T2, V7A + V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M }; enum ARMFPEnum { @@ -39,7 +39,7 @@ protected: }; /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE, - /// V6, V6T2, V7A. + /// V6, V6T2, V7A, V7M. ARMArchEnum ARMArchVersion; /// ARMFPUType - Floating Point Unit type. @@ -74,6 +74,13 @@ protected: /// only so far) bool HasFP16; + /// HasHardwareDivide - True if subtarget supports [su]div + bool HasHardwareDivide; + + /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack + /// instructions. + bool HasT2ExtractPack; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -123,6 +130,8 @@ protected: bool hasNEON() const { return ARMFPUType >= NEON; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } + bool hasDivide() const { return HasHardwareDivide; } + bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool useVMLx() const {return hasVFP2() && !SlowVMLx; } bool hasFP16() const { return HasFP16; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 662e61e..b4a9252 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -62,7 +62,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, @@ -76,7 +77,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 4e205df..a222e57 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -21,6 +21,7 @@ #include "ARMJITInfo.h" #include "ARMSubtarget.h" #include "ARMISelLowering.h" +#include "ARMSelectionDAGInfo.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/OwningPtr.h" @@ -63,6 +64,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { ARMInstrInfo InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; + ARMSelectionDAGInfo TSInfo; public: ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -75,6 +77,10 @@ public: return &TLInfo; } + virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } }; @@ -88,6 +94,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; + ARMSelectionDAGInfo TSInfo; public: ThumbTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -101,6 +108,10 @@ public: return &TLInfo; } + virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + /// returns either Thumb1InstrInfo or Thumb2InstrInfo virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 80a9d2d..d95efdb 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -319,16 +319,16 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); if (Modifier && strcmp(Modifier, "dregpair") == 0) { - unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, 5);// arm_dsubreg_0 - unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, 6);// arm_dsubreg_1 + unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0); + unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1); O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}'; } else if (Modifier && strcmp(Modifier, "lane") == 0) { unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); unsigned DReg = - TM.getRegisterInfo()->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, - &ARM::DPR_VFP2RegClass); + TM.getRegisterInfo()->getMatchingSuperReg(Reg, + RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; } else { assert(!MO.getSubReg() && "Subregs should be eliminated!"); @@ -1375,13 +1375,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. // This is a hack that lowers as a two instruction sequence. unsigned DstReg = MI->getOperand(0).getReg(); - unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); - + const MachineOperand &MO = MI->getOperand(1); + MCOperand V1, V2; + if (MO.isImm()) { + unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + V1 = MCOperand::CreateImm(ImmVal & 65535); + V2 = MCOperand::CreateImm(ImmVal >> 16); + } else if (MO.isGlobal()) { + MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO); + const MCSymbolRefExpr *SymRef1 = + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_LO16, OutContext); + const MCSymbolRefExpr *SymRef2 = + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_HI16, OutContext); + V1 = MCOperand::CreateExpr(SymRef1); + V2 = MCOperand::CreateExpr(SymRef2); + } else { + MI->dump(); + llvm_unreachable("cannot handle this operand"); + } + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg - TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm) + TmpInst.addOperand(V1); // lower16(imm) // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); @@ -1395,7 +1414,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { TmpInst.setOpcode(ARM::MOVTi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg - TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16)); // upper16(imm) + TmpInst.addOperand(V2); // upper16(imm) // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index ac6331f..2b94b76 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -195,8 +195,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // FIXME: Breaks e.g. ARM/vmul.ll. assert(0); /* - unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 - unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 + unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0); + unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1); O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}';*/ @@ -217,7 +217,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported")); O << '#' << Op.getImm(); } else { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0) + llvm_unreachable("Unsupported modifier"); assert(Op.isExpr() && "unknown operand kind in printOperand"); O << *Op.getExpr(); } diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h index 383d30d..b81a306 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h @@ -26,7 +26,7 @@ namespace llvm { //class ARMSubtarget; /// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst. -class VISIBILITY_HIDDEN ARMMCInstLower { +class LLVM_LIBRARY_VISIBILITY ARMMCInstLower { MCContext &Ctx; Mangler &Mang; AsmPrinter &Printer; diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 3c0414d..0a4400c 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -118,7 +118,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>(); const TargetMachine &TM = Fn.getTarget(); - if (AFI->isThumbFunction()) + if (AFI->isThumb1OnlyFunction()) return false; TRI = TM.getRegisterInfo(); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index ef6bf3a..a725898 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -33,7 +33,8 @@ namespace { private: bool FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs); + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const; bool PreAllocNEONRegisters(MachineBasicBlock &MBB); }; @@ -338,24 +339,122 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, return false; } -bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs) { - MachineInstr *RegSeq = 0; +bool +NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const { + MachineOperand &FMO = MI->getOperand(FirstOpnd); + assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = FMO.getReg(); + (void)VirtReg; + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + + unsigned LastSubIdx = 0; + if (FMO.isDef()) { + MachineInstr *RegSeq = 0; + for (unsigned R = 0; R < NumRegs; ++R) { + const MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + // Feeding into a REG_SEQUENCE. + if (!MRI->hasOneNonDBGUse(VirtReg)) + return false; + MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg); + if (!UseMI->isRegSequence()) + return false; + if (RegSeq && RegSeq != UseMI) + return false; + unsigned OpIdx = 1 + (Offset + R * Stride) * 2; + if (UseMI->getOperand(OpIdx).getReg() != VirtReg) + llvm_unreachable("Malformed REG_SEQUENCE instruction!"); + unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm(); + if (LastSubIdx) { + if (LastSubIdx != SubIdx-Stride) + return false; + } else { + // Must start from dsub_0 or qsub_0. + if (SubIdx != (ARM::dsub_0+Offset) && + SubIdx != (ARM::qsub_0+Offset)) + return false; + } + RegSeq = UseMI; + LastSubIdx = SubIdx; + } + + // In the case of vld3, etc., make sure the trailing operand of + // REG_SEQUENCE is an undef. + if (NumRegs == 3) { + unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2; + const MachineOperand &MO = RegSeq->getOperand(OpIdx); + unsigned VirtReg = MO.getReg(); + MachineInstr *DefMI = MRI->getVRegDef(VirtReg); + if (!DefMI || !DefMI->isImplicitDef()) + return false; + } + return true; + } + + unsigned LastSrcReg = 0; + SmallVector<unsigned, 4> SubIds; for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); + const MachineOperand &MO = MI->getOperand(FirstOpnd + R); assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); unsigned VirtReg = MO.getReg(); assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "expected a virtual register"); - if (!MRI->hasOneNonDBGUse(VirtReg)) + // Extracting from a Q or QQ register. + MachineInstr *DefMI = MRI->getVRegDef(VirtReg); + if (!DefMI || !DefMI->isExtractSubreg()) return false; - MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg); - if (UseMI->getOpcode() != TargetOpcode::REG_SEQUENCE) + VirtReg = DefMI->getOperand(1).getReg(); + if (LastSrcReg && LastSrcReg != VirtReg) return false; - if (RegSeq && RegSeq != UseMI) + LastSrcReg = VirtReg; + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + if (RC != ARM::QPRRegisterClass && + RC != ARM::QQPRRegisterClass && + RC != ARM::QQQQPRRegisterClass) return false; - RegSeq = UseMI; + unsigned SubIdx = DefMI->getOperand(2).getImm(); + if (LastSubIdx) { + if (LastSubIdx != SubIdx-Stride) + return false; + } else { + // Must start from dsub_0 or qsub_0. + if (SubIdx != (ARM::dsub_0+Offset) && + SubIdx != (ARM::qsub_0+Offset)) + return false; + } + SubIds.push_back(SubIdx); + LastSubIdx = SubIdx; } + + // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is + // currently required for correctness. e.g. + // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6 + // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6 + // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5 + // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>, + // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5 + // respectively. + // We need to change how we model uses of REG_SEQUENCE. + for (unsigned R = 0; R < NumRegs; ++R) { + MachineOperand &MO = MI->getOperand(FirstOpnd + R); + unsigned OldReg = MO.getReg(); + MachineInstr *DefMI = MRI->getVRegDef(OldReg); + assert(DefMI->isExtractSubreg()); + MO.setReg(LastSrcReg); + MO.setSubReg(SubIds[R]); + if (R != 0) + MO.setIsKill(false); + // Delete the EXTRACT_SUBREG if its result is now dead. + if (MRI->use_empty(OldReg)) + DefMI->eraseFromParent(); + } + return true; } @@ -368,7 +467,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { unsigned FirstOpnd, NumRegs, Offset, Stride; if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - if (FormsRegSequence(MI, FirstOpnd, NumRegs)) + if (llvm::ModelWithRegSequence() && + FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; MachineBasicBlock::iterator NextI = llvm::next(MBBI); @@ -390,7 +490,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { if (MO.isUse()) { // Insert a copy from VirtReg. TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg, - ARM::DPRRegisterClass, ARM::DPRRegisterClass); + ARM::DPRRegisterClass, ARM::DPRRegisterClass, + DebugLoc()); if (MO.isKill()) { MachineInstr *CopyMI = prior(MBBI); CopyMI->findRegisterUseOperand(VirtReg)->setIsKill(); @@ -399,7 +500,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { } else if (MO.isDef() && !MO.isDead()) { // Add a copy to VirtReg. TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(), - ARM::DPRRegisterClass, ARM::DPRRegisterClass); + ARM::DPRRegisterClass, ARM::DPRRegisterClass, + DebugLoc()); } } } diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index b10c3f7..fae84d4 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -17,6 +17,7 @@ #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" @@ -36,10 +37,8 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == ARM::GPRRegisterClass) { if (SrcRC == ARM::GPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); @@ -97,10 +96,8 @@ canFoldMemoryOperand(const MachineInstr *MI, void Thumb1InstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { assert((RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) && "Unknown regclass!"); @@ -108,6 +105,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && isARMLowRegister(SrcReg))) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -124,10 +124,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void Thumb1InstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { assert((RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) && "Unknown regclass!"); @@ -135,6 +133,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (RC == ARM::tGPRRegisterClass || (TargetRegisterInfo::isPhysicalRegister(DestReg) && isARMLowRegister(DestReg))) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -150,7 +151,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, bool Thumb1InstrInfo:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -161,9 +163,22 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, AddDefaultPred(MIB); for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - MIB.addReg(Reg, RegState::Kill); + bool isKill = true; + + // Add the callee-saved register as live-in unless it's LR and + // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress + // then it's already added to the function and entry block live-in sets. + if (Reg == ARM::LR) { + MachineFunction &MF = *MBB.getParent(); + if (MF.getFrameInfo()->isReturnAddressTaken() && + MF.getRegInfo().isLiveIn(Reg)) + isKill = false; + } + + if (isKill) { + MBB.addLiveIn(Reg); + MIB.addReg(Reg, RegState::Kill); + } } return true; } @@ -171,7 +186,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool Thumb1InstrInfo:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (CSI.empty()) diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 516ddf1..c937296 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -39,25 +39,30 @@ public: bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index b143bd9..531d5e9 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -40,10 +40,8 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == ARM::GPRRegisterClass) { if (SrcRC == ARM::GPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); @@ -63,17 +61,18 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, } // Handle SPR, DPR, and QPR copies. - return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC); + return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL); } void Thumb2InstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -87,17 +86,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, return; } - ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC); + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI); } void Thumb2InstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = @@ -110,7 +110,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, return; } - ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC); + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index a0f89a6..2948770 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -35,17 +35,20 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index ba403e2..3aba363 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -146,16 +146,14 @@ bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n"; if (DestRC != SrcRC) { // Not yet supported! return false; } - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (DestRC == Alpha::GPRCRegisterClass) { BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg) .addReg(SrcReg) @@ -180,7 +178,8 @@ void AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { //cerr << "Trying to store " << getPrettyName(SrcReg) << " to " // << FrameIdx << "\n"; //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg); @@ -208,7 +207,8 @@ void AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { //cerr << "Trying to load " << getPrettyName(DestReg) << " to " // << FrameIdx << "\n"; DebugLoc DL; @@ -399,7 +399,6 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31) .addReg(Alpha::R31) .addReg(Alpha::R31); @@ -430,7 +429,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass); + &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, + DebugLoc()); assert(Ok && "Couldn't assign to global base register!"); Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Alpha::R29); @@ -457,7 +457,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const { GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass); + &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, + DebugLoc()); assert(Ok && "Couldn't assign to global return address register!"); Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Alpha::R26); diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index c3b6044..7d7365b 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -48,16 +48,19 @@ public: MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index d5d5e02..a47a29b 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -836,7 +836,7 @@ class br_fcc<bits<6> opc, string asmstr> !strconcat(asmstr, " $R,$dst"), s_fbr>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { -let Ra = 31 in +let Ra = 31, isBarrier = 1 in def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>; def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp index 0eb7b8f..f1958fe 100644 --- a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp +++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "alpha-selectiondag-info" -#include "AlphaSelectionDAGInfo.h" +#include "AlphaTargetMachine.h" using namespace llvm; -AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() { +AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() { diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h index 70889ae..3405cc0 100644 --- a/lib/Target/Alpha/AlphaSelectionDAGInfo.h +++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class AlphaTargetMachine; + class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo { public: - AlphaSelectionDAGInfo(); + explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM); ~AlphaSelectionDAGInfo(); }; diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index 5169a01..fc9be03 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -32,7 +32,8 @@ AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), JITInfo(*this), Subtarget(TT, FS), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { setRelocationModel(Reloc::PIC_); } diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 0990f6d..153944e 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -20,6 +20,7 @@ #include "AlphaInstrInfo.h" #include "AlphaJITInfo.h" #include "AlphaISelLowering.h" +#include "AlphaSelectionDAGInfo.h" #include "AlphaSubtarget.h" namespace llvm { @@ -33,6 +34,7 @@ class AlphaTargetMachine : public LLVMTargetMachine { AlphaJITInfo JITInfo; AlphaSubtarget Subtarget; AlphaTargetLowering TLInfo; + AlphaSelectionDAGInfo TSInfo; public: AlphaTargetMachine(const Target &T, const std::string &TT, @@ -47,6 +49,9 @@ public: virtual const AlphaTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual AlphaJITInfo* getJITInfo() { return &JITInfo; diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index cf1901b..73924b7 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -138,9 +138,8 @@ bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (inClass(BF::ALLRegClass, DestReg, DestRC) && inClass(BF::ALLRegClass, SrcReg, SrcRC)) { BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg); @@ -196,7 +195,8 @@ BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (inClass(BF::DPRegClass, SrcReg, RC)) { @@ -242,7 +242,8 @@ BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); if (inClass(BF::DPRegClass, DestReg, RC)) { BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg) diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h index ea3429c..c1dcd58 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -50,13 +50,15 @@ namespace llvm { MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, @@ -67,7 +69,8 @@ namespace llvm { virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index 2471688..5cf350a 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -301,9 +301,9 @@ def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr), def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>; def : Pat<(i16 (extloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>; + (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>; def : Pat<(i16 (zextloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>; + (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>; def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), "$dst = b[$ptr + $off] (z);", @@ -313,17 +313,17 @@ def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))), (LOAD32p_imm16_8z P:$ptr, imm:$off)>; def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))), (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off), - bfin_subreg_lo16)>; + lo16)>; def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))), (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off), - bfin_subreg_lo16)>; + lo16)>; def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr), "$dst = b[$ptr] (x);", [(set D:$dst, (sextloadi8 P:$ptr))]>; def : Pat<(i16 (sextloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), bfin_subreg_lo16)>; + (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>; def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), "$dst = b[$ptr + $off] (x);", @@ -331,7 +331,7 @@ def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))), (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off), - bfin_subreg_lo16)>; + lo16)>; // Memory loads without patterns let mayLoad = 1 in { @@ -468,16 +468,16 @@ def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off), def : Pat<(truncstorei16 D:$val, PI:$ptr), (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)), - bfin_subreg_lo16), PI:$ptr)>; + lo16), PI:$ptr)>; def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr), (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)), - bfin_subreg_hi16), PI:$ptr)>; + hi16), PI:$ptr)>; def : Pat<(truncstorei8 D16L:$val, P:$ptr), (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (i16 (COPY_TO_REGCLASS D16L:$val, D16L)), - bfin_subreg_lo16), + lo16), P:$ptr)>; //===----------------------------------------------------------------------===// @@ -516,19 +516,19 @@ def : Pat<(sext_inreg D16L:$src, i8), (EXTRACT_SUBREG (MOVEsext8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), D16L:$src, - bfin_subreg_lo16)), - bfin_subreg_lo16)>; + lo16)), + lo16)>; def : Pat<(sext_inreg D:$src, i16), - (MOVEsext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>; + (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>; def : Pat<(and D:$src, 0xffff), - (MOVEzext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>; + (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>; def : Pat<(i32 (anyext D16L:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (i16 (COPY_TO_REGCLASS D16L:$src, D16L)), - bfin_subreg_lo16)>; + lo16)>; // TODO Dreg = Dreg_byte (X/Z) @@ -859,4 +859,4 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)), def : Pat<(BfinCall (i32 texternalsym:$dst)), (CALLa texternalsym:$dst)>; def : Pat<(i16 (trunc D:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>; + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 2512c9b..5153ace 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -111,7 +111,7 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return DisableFramePointerElim(MF) || - MFI->hasCalls() || MFI->hasVarSizedObjects(); + MFI->adjustsStack() || MFI->hasVarSizedObjects(); } bool BlackfinRegisterInfo:: @@ -177,11 +177,11 @@ void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB, // We must split into halves BuildMI(MBB, I, DL, - TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_hi16)) + TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16)) .addImm((value >> 16) & 0xffff) .addReg(Reg, RegState::ImplicitDefine); BuildMI(MBB, I, DL, - TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_lo16)) + TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16)) .addImm(value & 0xffff) .addReg(Reg, RegState::ImplicitKill) .addReg(Reg, RegState::ImplicitDefine); @@ -394,7 +394,7 @@ void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const { } if (!hasFP(MF)) { - assert(!MFI->hasCalls() && + assert(!MFI->adjustsStack() && "FP elimination on a non-leaf function is not supported"); adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize); return; @@ -435,7 +435,7 @@ void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF, assert(FrameSize%4 == 0 && "Misaligned frame size"); if (!hasFP(MF)) { - assert(!MFI->hasCalls() && + assert(!MFI->adjustsStack() && "FP elimination on a non-leaf function is not supported"); adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize); return; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 7cfb120..03c5450 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -24,13 +24,6 @@ namespace llvm { class TargetInstrInfo; class Type; - // Subregister indices, keep in sync with BlackfinRegisterInfo.td - enum BfinSubregIdx { - bfin_subreg_lo16 = 1, - bfin_subreg_hi16 = 2, - bfin_subreg_lo32 = 3 - }; - struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo { BlackfinSubtarget &Subtarget; const TargetInstrInfo &TII; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td index d396cc8..e1cfae9 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.td +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td @@ -11,8 +11,18 @@ // Declarations that describe the Blackfin register file //===----------------------------------------------------------------------===// -// Registers are identified with 3-bit group and 3-bit ID numbers. +// Subregs are: +// 1: .L +// 2: .H +// 3: .W (32 low bits of 40-bit accu) +let Namespace = "BF" in { +def lo16 : SubRegIndex; +def hi16 : SubRegIndex; +def lo32 : SubRegIndex; +def hi32 : SubRegIndex; +} +// Registers are identified with 3-bit group and 3-bit ID numbers. class BlackfinReg<string n> : Register<n> { field bits<3> Group; field bits<3> Num; @@ -40,6 +50,7 @@ class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> { // Ra 40-bit accumulator registers class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> { let SubRegs = subs; + let SubRegIndices = [hi32, lo32]; let Group = 4; let Num = num; } @@ -54,6 +65,7 @@ multiclass Rss<bits<3> group, bits<3> num, string n> { class Rii<bits<3> group, bits<3> num, string n, list<Register> subs> : BlackfinReg<n> { let SubRegs = subs; + let SubRegIndices = [hi16, lo16]; let Group = group; let Num = num; } @@ -164,7 +176,7 @@ def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>; def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>; def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> { - let SubRegs = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]; + let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]; } def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>; @@ -182,38 +194,6 @@ def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>; def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>; def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>; -// Subregs are: -// 1: .L -// 2: .H -// 3: .W (32 low bits of 40-bit accu) -// Keep in sync with enum in BlackfinRegisterInfo.h -def bfin_subreg_lo16 : PatLeaf<(i32 1)>; -def bfin_subreg_hi16 : PatLeaf<(i32 2)>; -def bfin_subreg_32bit : PatLeaf<(i32 3)>; - -def : SubRegSet<1, - [R0, R1, R2, R3, R4, R5, R6, R7, - P0, P1, P2, P3, P4, P5, SP, FP, - I0, I1, I2, I3, M0, M1, M2, M3, - B0, B1, B2, B3, L0, L1, L2, L3], - [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L, - P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL, - I0L, I1L, I2L, I3L, M0L, M1L, M2L, M3L, - B0L, B1L, B2L, B3L, L0L, L1L, L2L, L3L]>; - -def : SubRegSet<2, - [R0, R1, R2, R3, R4, R5, R6, R7, - P0, P1, P2, P3, P4, P5, SP, FP, - I0, I1, I2, I3, M0, M1, M2, M3, - B0, B1, B2, B3, L0, L1, L2, L3], - [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H, - P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH, - I0H, I1H, I2H, I3H, M0H, M1H, M2H, M3H, - B0H, B1H, B2H, B3H, L0H, L1H, L2H, L3H]>; - -def : SubRegSet<1, [A0, A0W, A1, A1W], [A0L, A0L, A1L, A1L]>; -def : SubRegSet<2, [A0, A0W, A1, A1W], [A0H, A0H, A1H, A1H]>; - // Register classes. def D16 : RegisterClass<"BF", [i16], 16, [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L, @@ -260,11 +240,11 @@ def GR16 : RegisterClass<"BF", [i16], 16, L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>; def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { - let SubRegClassList = [D16L, D16H]; + let SubRegClasses = [(D16L lo16), (D16H hi16)]; } def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> { - let SubRegClassList = [P16L, P16H]; + let SubRegClasses = [(P16L lo16), (P16H hi16)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -287,7 +267,7 @@ def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>; def DP : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7, P0, P1, P2, P3, P4, P5, FP, SP]> { - let SubRegClassList = [DP16L, DP16H]; + let SubRegClasses = [(DP16L lo16), (DP16H hi16)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp index f4bb25f..a21f696 100644 --- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp +++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "blackfin-selectiondag-info" -#include "BlackfinSelectionDAGInfo.h" +#include "BlackfinTargetMachine.h" using namespace llvm; -BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() { +BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo( + const BlackfinTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() { diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h index a620330..f1ce348 100644 --- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h +++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class BlackfinTargetMachine; + class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo { public: - BlackfinSelectionDAGInfo(); + explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM); ~BlackfinSelectionDAGInfo(); }; diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp index 45d7c35..66a2f68 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -31,6 +31,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, DataLayout("e-p:32:32-i64:32-f64:32-n32"), Subtarget(TT, FS), TLInfo(*this), + TSInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) { } diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h index 07e7394..a63aa54 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -20,6 +20,7 @@ #include "BlackfinInstrInfo.h" #include "BlackfinSubtarget.h" #include "BlackfinISelLowering.h" +#include "BlackfinSelectionDAGInfo.h" #include "BlackfinIntrinsicInfo.h" namespace llvm { @@ -28,6 +29,7 @@ namespace llvm { const TargetData DataLayout; BlackfinSubtarget Subtarget; BlackfinTargetLowering TLInfo; + BlackfinSelectionDAGInfo TSInfo; BlackfinInstrInfo InstrInfo; TargetFrameInfo FrameInfo; BlackfinIntrinsicInfo IntrinsicInfo; @@ -46,6 +48,9 @@ namespace llvm { virtual const BlackfinTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 67f513b..55b8aaa 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -2165,6 +2165,9 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { case CallingConv::X86_FastCall: Out << "__attribute__((fastcall)) "; break; + case CallingConv::X86_ThisCall: + Out << "__attribute__((thiscall)) "; + break; default: break; } @@ -3554,11 +3557,11 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { // External Interface declaration //===----------------------------------------------------------------------===// -bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) { +bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(createGCLoweringPass()); diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index d178e7f..6fed195 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -23,12 +23,11 @@ struct CTargetMachine : public TargetMachine { CTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt index 4783dd5..0e7ad35 100644 --- a/lib/Target/CellSPU/README.txt +++ b/lib/Target/CellSPU/README.txt @@ -10,6 +10,8 @@ Department in The Aerospace Corporation: - Chandler Carruth (LLVM expertise) - Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise) +Some minor fixes added by Kalle Raiskila. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 5e04454..081e8d0 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -485,7 +485,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Set pre-RA register scheduler default to BURR, which produces slightly // better code than the default (could also be TDRR, but TargetLowering.h // needs a mod to support that model): - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); } const char * diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 423da3b..4c53c98 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -255,16 +255,14 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // We support cross register class moves for our aliases, such as R3 in any // reg class to any other reg class containing R3. This is required because // we instruction select bitconvert i64 -> f64 as a noop for example, so our // types have no specific meaning. - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (DestRC == SPU::R8CRegisterClass) { BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg); } else if (DestRC == SPU::R16CRegisterClass) { @@ -291,9 +289,10 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, void SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { unsigned opc; bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset()); @@ -325,9 +324,10 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { unsigned opc; bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset()); @@ -467,6 +467,9 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (isUncondBranch(LastInst)) { + // Check for jump tables + if (!LastInst->getOperand(0).isMBB()) + return true; TBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(LastInst)) { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 42677fc..6dabd7c 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -60,19 +60,22 @@ namespace llvm { MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; //! Store a register to a stack slot, based on its register class. virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; //! Load a register from a stack slot, based on its register class. virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; //! Return true if the specified load or store can be folded virtual diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 6d1f87d..a7fb14c 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -655,7 +655,7 @@ def SFHvec: def SFHr16: RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), "sfh\t$rT, $rA, $rB", IntegerOp, - [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>; + [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>; def SFHIvec: RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -670,11 +670,11 @@ def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "sf\t$rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "sf\t$rT, $rA, $rB", IntegerOp, - [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>; + [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; def SFIvec: RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index fdbe10f..d8937ec 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -469,7 +469,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->hasCalls()) { + if (FrameSize > 16 || MFI->adjustsStack()) { FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. @@ -569,7 +569,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const && "SPURegisterInfo::emitEpilogue: FrameSize not aligned"); // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->hasCalls()) { + if (FrameSize > 16 || MFI->adjustsStack()) { FrameSize = FrameSize + SPUFrameInfo::minStackSize(); if (isInt<10>(FrameSize + LinkSlotOffset)) { // Reload $lr, adjust $sp by required amount diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp index ca2a4bf..5732fd4 100644 --- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp +++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "cellspu-selectiondag-info" -#include "SPUSelectionDAGInfo.h" +#include "SPUTargetMachine.h" using namespace llvm; -SPUSelectionDAGInfo::SPUSelectionDAGInfo() { +SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SPUSelectionDAGInfo::~SPUSelectionDAGInfo() { diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h index 0a6b4c1..39257d9 100644 --- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h +++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class SPUTargetMachine; + class SPUSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SPUSelectionDAGInfo(); + explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM); ~SPUSelectionDAGInfo(); }; diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 6500067..480ec3f 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -42,6 +42,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, InstrInfo(*this), FrameInfo(*this), TLInfo(*this), + TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { // For the time being, use static relocations, since there's really no // support for PIC yet. diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 37e7cd2..7e02701 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -17,6 +17,7 @@ #include "SPUSubtarget.h" #include "SPUInstrInfo.h" #include "SPUISelLowering.h" +#include "SPUSelectionDAGInfo.h" #include "SPUFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -34,6 +35,7 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUInstrInfo InstrInfo; SPUFrameInfo FrameInfo; SPUTargetLowering TLInfo; + SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; public: SPUTargetMachine(const Target &T, const std::string &TT, @@ -61,6 +63,10 @@ public: return &TLInfo; } + virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const SPURegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index e739b26..45a0c84 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1038,6 +1038,11 @@ namespace { Out << ");"; nl(Out); } + if (GV->isThreadLocal()) { + printCppName(GV); + Out << "->setThreadLocal(true);"; + nl(Out); + } if (is_inline) { out(); Out << "}"; nl(Out); } @@ -2007,11 +2012,11 @@ char CppWriter::ID = 0; // External Interface declaration //===----------------------------------------------------------------------===// -bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) { +bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index b7aae91..e42166e 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -26,12 +26,11 @@ struct CPPTargetMachine : public TargetMachine { const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index 04dfb0a..e42e9b3 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -155,7 +155,7 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { CPUBitmask |= (1 << MBlazeRegisterInfo:: getRegisterNumbering(RI.getFrameRegister(*MF))); - if (MFI->hasCalls()) + if (MFI->adjustsStack()) CPUBitmask |= (1 << MBlazeRegisterInfo:: getRegisterNumbering(RI.getRARegister())); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 01f3174..4c4d86b 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -107,7 +107,6 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { void MBlazeInstrInfo:: insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); BuildMI(MBB, MI, DL, get(MBlaze::NOP)); } @@ -115,8 +114,8 @@ bool MBlazeInstrInfo:: copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg) .addReg(SrcReg).addReg(MBlaze::R0); return true; @@ -125,7 +124,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void MBlazeInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill)) .addImm(0).addFrameIndex(FI); @@ -134,7 +134,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void MBlazeInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg) .addImm(0).addFrameIndex(FI); @@ -210,7 +211,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass); bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20, MBlaze::CPURegsRegisterClass, - MBlaze::CPURegsRegisterClass); + MBlaze::CPURegsRegisterClass, + DebugLoc()); assert(Ok && "Couldn't assign to global base register!"); Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(MBlaze::R20); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index 4f79f1c..c9fdc88 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -203,16 +203,19 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index e15176e..f15eea9 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -220,7 +220,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const { StackOffset += RegSize; } - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { MBlazeFI->setRAStackOffset(0); MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); @@ -311,8 +311,8 @@ emitPrologue(MachineFunction &MF) const { unsigned StackSize = MFI->getStackSize(); // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->hasCalls()) return; - if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + if (StackSize == 0 && !MFI->adjustsStack()) return; + if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28; int FPOffset = MBlazeFI->getFPStackOffset(); int RAOffset = MBlazeFI->getRAStackOffset(); @@ -323,7 +323,7 @@ emitPrologue(MachineFunction &MF) const { // Save the return address only if the function isnt a leaf one. // swi R15, R1, stack_loc - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI)) .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1); } @@ -366,14 +366,14 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { // Restore the return address only if the function isnt a leaf one. // lwi R15, R1, stack_loc - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15) .addImm(RAOffset).addReg(MBlaze::R1); } // Get the number of bytes from FrameInfo int StackSize = (int) MFI->getStackSize(); - if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28; // adjust stack. // addi R1, R1, imm diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index 96a5c98..d0a1e75 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -17,21 +17,15 @@ class MBlazeReg<string n> : Register<n> { let Namespace = "MBlaze"; } -class MBlazeRegWithSubRegs<string n, list<Register> subregs> - : RegisterWithSubRegs<n, subregs> { - field bits<5> Num; - let Namespace = "MBlaze"; -} - // MBlaze CPU Registers class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> { let Num = num; } // MBlaze 32-bit (aliased) FPU Registers -class FPR<bits<5> num, string n, list<Register> subregs> - : MBlazeRegWithSubRegs<n, subregs> { +class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> { let Num = num; + let Aliases = aliases; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp index 105e42a..6a115b2 100644 --- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp +++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mblaze-selectiondag-info" -#include "MBlazeSelectionDAGInfo.h" +#include "MBlazeTargetMachine.h" using namespace llvm; -MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() { +MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() { diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h index 11e6879..9f8e2aa 100644 --- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h +++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class MBlazeTargetMachine; + class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo { public: - MBlazeSelectionDAGInfo(); + explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM); ~MBlazeSelectionDAGInfo(); }; diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 9eba2b3..4252953 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -39,7 +39,7 @@ MBlazeTargetMachine(const Target &T, const std::string &TT, "f64:32:32-v64:32:32-v128:32:32-n32"), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), - TLInfo(*this) { + TLInfo(*this), TSInfo(*this) { if (getRelocationModel() == Reloc::Default) { setRelocationModel(Reloc::Static); } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 9bf9898..6a57e58 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -17,6 +17,7 @@ #include "MBlazeSubtarget.h" #include "MBlazeInstrInfo.h" #include "MBlazeISelLowering.h" +#include "MBlazeSelectionDAGInfo.h" #include "MBlazeIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -31,6 +32,7 @@ namespace llvm { MBlazeInstrInfo InstrInfo; TargetFrameInfo FrameInfo; MBlazeTargetLowering TLInfo; + MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; public: MBlazeTargetMachine(const Target &T, const std::string &TT, @@ -54,6 +56,9 @@ namespace llvm { virtual const MBlazeTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const + { return &TSInfo; } + const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 15d16ec..3de173c 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -34,12 +34,11 @@ namespace llvm { MSILTarget(const Target &T, const std::string &TT, const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; @@ -279,6 +278,8 @@ std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) { return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) "; case CallingConv::X86_StdCall: return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) "; + case CallingConv::X86_ThisCall: + return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) "; default: errs() << "CallingConvID = " << CallingConvID << '\n'; llvm_unreachable("Unsupported calling convention"); @@ -1686,11 +1687,11 @@ void MSILWriter::printExternals() { // External Interface declaration //===----------------------------------------------------------------------===// -bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) +bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; MSILWriter* Writer = new MSILWriter(o); diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h index f9620e8..e937696 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h @@ -26,7 +26,7 @@ namespace llvm { /// MSP430MCInstLower - This class is used to lower an MachineInstr /// into an MCInst. -class VISIBILITY_HIDDEN MSP430MCInstLower { +class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower { MCContext &Ctx; Mangler &Mang; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index c3e2bdf7..403400e 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -83,7 +83,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); - setSchedulingPreference(SchedulingForLatency); + setSchedulingPreference(Sched::Latency); // We have post-incremented loads / stores. setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); @@ -897,6 +897,9 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -920,6 +923,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 2b09b3d..18226ab 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -32,7 +32,8 @@ MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm) void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -59,7 +60,8 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const{ + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const{ DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); @@ -85,10 +87,8 @@ bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == SrcRC) { unsigned Opc; if (DestRC == &MSP430::GR16RegClass) { @@ -130,7 +130,8 @@ MSP430InstrInfo::isMoveInstr(const MachineInstr& MI, bool MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -154,7 +155,8 @@ MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 6ef4b0a..842b4cb 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -52,7 +52,8 @@ public: bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -62,18 +63,22 @@ public: MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; unsigned GetInstSizeInBytes(const MachineInstr *MI) const; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td index 4078626..f8aec66 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.td +++ b/lib/Target/MSP430/MSP430RegisterInfo.td @@ -43,6 +43,9 @@ def R13B : MSP430Reg<13, "r13">; def R14B : MSP430Reg<14, "r14">; def R15B : MSP430Reg<15, "r15">; +def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; } + +let SubRegIndices = [subreg_8bit] in { def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>; def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>; def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>; @@ -59,13 +62,7 @@ def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>; def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>; def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>; def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>; - -def : SubRegSet<1, [PCW, SPW, SRW, CGW, FPW, - R5W, R6W, R7W, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], - [PCB, SPB, SRB, CGB, FPB, - R5B, R6B, R7B, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def subreg_8bit : PatLeaf<(i32 1)>; +} def GR8 : RegisterClass<"MSP430", [i8], 8, // Volatile registers @@ -101,7 +98,7 @@ def GR16 : RegisterClass<"MSP430", [i16], 16, // Volatile, but not allocable PCW, SPW, SRW, CGW]> { - let SubRegClassList = [GR8]; + let SubRegClasses = [(GR8 subreg_8bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp index a54c929..24f45fa 100644 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "msp430-selectiondag-info" -#include "MSP430SelectionDAGInfo.h" +#include "MSP430TargetMachine.h" using namespace llvm; -MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() { +MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() { diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h index c952ab7..fa81948 100644 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class MSP430TargetMachine; + class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo { public: - MSP430SelectionDAGInfo(); + explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM); ~MSP430SelectionDAGInfo(); }; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index a0dbac2..99877c8 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -33,7 +33,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, Subtarget(TT, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index 68bde9a..b93edfd 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -17,6 +17,7 @@ #include "MSP430InstrInfo.h" #include "MSP430ISelLowering.h" +#include "MSP430SelectionDAGInfo.h" #include "MSP430RegisterInfo.h" #include "MSP430Subtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ class MSP430TargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; + MSP430SelectionDAGInfo TSInfo; // MSP430 does not have any call stack frame, therefore not having // any MSP430 specific FrameInfo class. @@ -54,6 +56,10 @@ public: return &TLInfo; } + virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // MSP430TargetMachine. diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index d269153..4d7fe4c 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -145,7 +145,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { CPUBitmask |= (1 << MipsRegisterInfo:: getRegisterNumbering(RI.getFrameRegister(*MF))); - if (MFI->hasCalls()) + if (MFI->adjustsStack()) CPUBitmask |= (1 << MipsRegisterInfo:: getRegisterNumbering(RI.getRARegister())); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index ee85a3f3..3888bbf 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -225,12 +225,12 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { MVT::Other, Offset0, Base, Chain); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(LD0, 0)); SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, MVT::Other, Offset1, Base, SDValue(LD0, 1)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(LD1, 0)); ReplaceUses(SDValue(N, 0), I1); @@ -266,9 +266,9 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { DebugLoc dl = N->getDebugLoc(); // Get the even and odd part from the f64 register - SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD, + SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, dl, MVT::f32, N1); - SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPEVEN, + SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven, dl, MVT::f32, N1); // The second store should start after for 4 bytes. @@ -438,9 +438,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue Undef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0); SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(MTC, 0)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(MTC, 0)); ReplaceUses(SDValue(Node, 0), I1); return I1.getNode(); diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index dbd3c24..4005e35 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -124,7 +124,6 @@ void MipsInstrInfo:: insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); BuildMI(MBB, MI, DL, get(Mips::NOP)); } @@ -132,10 +131,8 @@ bool MipsInstrInfo:: copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - - if (I != MBB.end()) DL = I->getDebugLoc(); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC != SrcRC) { @@ -190,7 +187,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void MipsInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -223,7 +221,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void MipsInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -624,7 +623,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass); bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP, Mips::CPURegsRegisterClass, - Mips::CPURegsRegisterClass); + Mips::CPURegsRegisterClass, + DebugLoc()); assert(Ok && "Couldn't assign to global base register!"); Ok = Ok; // Silence warning when assertions are turned off. RegInfo.addLiveIn(Mips::GP); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index ab8dc59..7919d9a 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -209,16 +209,19 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 478da84..5e719af 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -288,7 +288,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const // Stack locations for FP and RA. If only one of them is used, // the space must be allocated for both, otherwise no space at all. - if (hasFP(MF) || MFI->hasCalls()) { + if (hasFP(MF) || MFI->adjustsStack()) { // FP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); @@ -302,7 +302,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const MipsFI->setRAStackOffset(StackOffset); StackOffset += RegSize; - if (MFI->hasCalls()) + if (MFI->adjustsStack()) TopCPUSavedRegOff += RegSize; } @@ -407,7 +407,7 @@ emitPrologue(MachineFunction &MF) const unsigned StackSize = MFI->getStackSize(); // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->hasCalls()) return; + if (StackSize == 0 && !MFI->adjustsStack()) return; int FPOffset = MipsFI->getFPStackOffset(); int RAOffset = MipsFI->getRAStackOffset(); @@ -425,7 +425,7 @@ emitPrologue(MachineFunction &MF) const // Save the return address only if the function isnt a leaf one. // sw $ra, stack_loc($sp) - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP); } @@ -477,7 +477,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const // Restore the return address only if the function isnt a leaf one. // lw $ra, stack_loc($sp) - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA) .addImm(RAOffset).addReg(Mips::SP); } diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 9fd044c..bc857b8 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -23,15 +23,6 @@ class MipsSubtarget; class TargetInstrInfo; class Type; -namespace Mips { - /// SubregIndex - The index of various sized subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// MipsRegisterInfo.td file. - enum SubregIndex { - SUBREG_FPEVEN = 1, SUBREG_FPODD = 2 - }; -} - struct MipsRegisterInfo : public MipsGenRegisterInfo { const MipsSubtarget &Subtarget; const TargetInstrInfo &TII; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 00e7723..be78a22 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -34,9 +34,14 @@ class FPR<bits<5> num, string n> : MipsReg<n> { } // Mips 64-bit (aliased) FPU Registers -class AFPR<bits<5> num, string n, list<Register> subregs> +let Namespace = "Mips" in { +def sub_fpeven : SubRegIndex; +def sub_fpodd : SubRegIndex; +} +class AFPR<bits<5> num, string n, list<Register> subregs> : MipsRegWithSubRegs<n, subregs> { let Num = num; + let SubRegIndices = [sub_fpeven, sub_fpodd]; } //===----------------------------------------------------------------------===// @@ -141,23 +146,6 @@ let Namespace = "Mips" in { } //===----------------------------------------------------------------------===// -// Subregister Set Definitions -//===----------------------------------------------------------------------===// - -def mips_subreg_fpeven : PatLeaf<(i32 1)>; -def mips_subreg_fpodd : PatLeaf<(i32 2)>; - -def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [F0, F2, F4, F6, F8, F10, F12, F14, - F16, F18, F20, F22, F24, F26, F28, F30]>; - -def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15], - [F1, F3, F5, F7, F9, F11, F13, F15, - F17, F19, F21, F23, F25, F27, F29, F31]>; - -//===----------------------------------------------------------------------===// // Register Classes //===----------------------------------------------------------------------===// @@ -255,7 +243,7 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, // Reserved D15]> { - let SubRegClassList = [FGR32, FGR32]; + let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp index 72c149d..e4d70fc 100644 --- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp +++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-selectiondag-info" -#include "MipsSelectionDAGInfo.h" +#include "MipsTargetMachine.h" using namespace llvm; -MipsSelectionDAGInfo::MipsSelectionDAGInfo() { +MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MipsSelectionDAGInfo::~MipsSelectionDAGInfo() { diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h index 6eaf0c9..6cafb55 100644 --- a/lib/Target/Mips/MipsSelectionDAGInfo.h +++ b/lib/Target/Mips/MipsSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class MipsTargetMachine; + class MipsSelectionDAGInfo : public TargetSelectionDAGInfo { public: - MipsSelectionDAGInfo(); + explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM); ~MipsSelectionDAGInfo(); }; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 4724ff7..ad3eb9e 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -42,7 +42,7 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), - TLInfo(*this) { + TLInfo(*this), TSInfo(*this) { // Abicall enables PIC by default if (getRelocationModel() == Reloc::Default) { if (Subtarget.isABI_O32()) diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index cd671cf..d63976f 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -17,6 +17,7 @@ #include "MipsSubtarget.h" #include "MipsInstrInfo.h" #include "MipsISelLowering.h" +#include "MipsSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" @@ -30,6 +31,7 @@ namespace llvm { MipsInstrInfo InstrInfo; TargetFrameInfo FrameInfo; MipsTargetLowering TLInfo; + MipsSelectionDAGInfo TSInfo; public: MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool isLittle); @@ -51,6 +53,10 @@ namespace llvm { return &TLInfo; } + virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h index a424c27..aa2e1f4 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -29,7 +29,7 @@ #include <string> namespace llvm { - class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter { + class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter { public: explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer); private: diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 5d86329..6a4d0d6 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -70,7 +70,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, // We also need to encode the information about the base type of // pointer in TypeNo. - DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom(); + DIType BaseType = DIDerivedType(Ty).getTypeDerivedFrom(); PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName); } @@ -79,7 +79,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TagName) { - DICompositeType CTy = DICompositeType(Ty.getNode()); + DICompositeType CTy = DICompositeType(Ty); DIArray Elements = CTy.getTypeArray(); unsigned short size = 1; unsigned short Dimension[4]={0,0,0,0}; @@ -88,7 +88,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, if (Element.getTag() == dwarf::DW_TAG_subrange_type) { TypeNo = TypeNo << PIC16Dbg::S_DERIVED; TypeNo = TypeNo | PIC16Dbg::DT_ARY; - DISubrange SubRange = DISubrange(Element.getNode()); + DISubrange SubRange = DISubrange(Element); Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1; // Each dimension is represented by 2 bytes starting at byte 9. Aux[8+i*2+0] = Dimension[i]; @@ -111,7 +111,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TagName) { - DICompositeType CTy = DICompositeType(Ty.getNode()); + DICompositeType CTy = DICompositeType(Ty); TypeNo = TypeNo << PIC16Dbg::S_BASIC; if (Ty.getTag() == dwarf::DW_TAG_structure_type) TypeNo = TypeNo | PIC16Dbg::T_STRUCT; @@ -124,7 +124,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, // llvm.dbg.composite* global variable. Since we need to revisit // PIC16DebugInfo implementation anyways after the MDNodes based // framework is done, let us continue with the way it is. - std::string UniqueSuffix = "." + Ty.getNode()->getNameStr().substr(18); + std::string UniqueSuffix = "." + Ty->getNameStr().substr(18); TagName += UniqueSuffix; unsigned short size = CTy.getSizeInBits()/8; // 7th and 8th byte represent size. @@ -303,7 +303,7 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, bool HasAux = false; int ElementAux[PIC16Dbg::AuxSize] = { 0 }; std::string TagName = ""; - DIDerivedType DITy(Element.getNode()); + DIDerivedType DITy(Element); unsigned short ElementSize = DITy.getSizeInBits()/8; // Get mangleddd name for this structure/union element. std::string MangMemName = DITy.getName().str() + SuffixNo; @@ -336,7 +336,7 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { CTy.getTag() == dwarf::DW_TAG_structure_type ) { // Get the number after llvm.dbg.composite and make UniqueSuffix from // it. - std::string DIVar = CTy.getNode()->getNameStr(); + std::string DIVar = CTy->getNameStr(); std::string UniqueSuffix = "." + DIVar.substr(18); std::string MangledCTyName = CTy.getName().str() + UniqueSuffix; unsigned short size = CTy.getSizeInBits()/8; diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index f1fcec5..ecaddd3 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -26,7 +26,7 @@ using namespace llvm; namespace { -class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel { +class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel { /// TM - Keep a reference to PIC16TargetMachine. const PIC16TargetMachine &TM; diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 9e415e0..793dd9f 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -70,7 +70,8 @@ unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const PIC16TargetLowering *PTLI = TM.getTargetLowering(); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -112,7 +113,8 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const PIC16TargetLowering *PTLI = TM.getTargetLowering(); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -153,9 +155,8 @@ bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == PIC16::FSR16RegisterClass) { BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg); diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h index 56f51f0..40a4cb4 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.h +++ b/lib/Target/PIC16/PIC16InstrInfo.h @@ -49,17 +49,20 @@ public: virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h index 9039ca7..5b33b51 100644 --- a/lib/Target/PIC16/PIC16Section.h +++ b/lib/Target/PIC16/PIC16Section.h @@ -44,7 +44,8 @@ namespace llvm { unsigned Size; PIC16Section(StringRef name, SectionKind K, StringRef addr, int color) - : MCSection(K), Name(name), Address(addr), Color(color), Size(0) { + : MCSection(SV_PIC16, K), Name(name), Address(addr), + Color(color), Size(0) { } public: @@ -86,6 +87,11 @@ namespace llvm { /// to a section. virtual void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS) const; + + static bool classof(const MCSection *S) { + return S->getVariant() == SV_PIC16; + } + static bool classof(const PIC16Section *) { return true; } }; } // end namespace llvm diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp index 76c6c60..995955a 100644 --- a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp +++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pic16-selectiondag-info" -#include "PIC16SelectionDAGInfo.h" +#include "PIC16TargetMachine.h" using namespace llvm; -PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() { +PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() { diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h index 112480e5..c67fd8b 100644 --- a/lib/Target/PIC16/PIC16SelectionDAGInfo.h +++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class PIC16TargetMachine; + class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo { public: - PIC16SelectionDAGInfo(); + explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM); ~PIC16SelectionDAGInfo(); }; diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp index e2acb85..82b69be 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.cpp +++ b/lib/Target/PIC16/PIC16TargetMachine.cpp @@ -35,7 +35,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, Trad), DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { } diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h index 849845a..dae5d31 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.h +++ b/lib/Target/PIC16/PIC16TargetMachine.h @@ -17,6 +17,7 @@ #include "PIC16InstrInfo.h" #include "PIC16ISelLowering.h" +#include "PIC16SelectionDAGInfo.h" #include "PIC16RegisterInfo.h" #include "PIC16Subtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ class PIC16TargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment PIC16InstrInfo InstrInfo; PIC16TargetLowering TLInfo; + PIC16SelectionDAGInfo TSInfo; // PIC16 does not have any call stack frame, therefore not having // any PIC16 specific FrameInfo class. @@ -54,6 +56,10 @@ public: return &TLInfo; } + virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3d9f8aa..00eebb8 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -712,8 +712,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1) IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, CCReg), 0); - else - IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, CCReg), 0); + else + IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, + CR7Reg, CCReg), 0); SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), getI32Imm(31), getI32Imm(31) }; @@ -848,7 +849,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, N->getOperand(0), InFlag); else - return CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, InFlag); + return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32, + N->getOperand(0), InFlag); } case ISD::SDIV: { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6f11953..10b516a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5496,12 +5496,15 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + DebugLoc dl = Op.getDebugLoc(); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); // Make sure the function does not optimize away the store of the RA to // the stack. - MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); FuncInfo->setLRStoreRequired(); bool isPPC64 = PPCSubTarget.isPPC64(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 1d05f3d..6dcaf1e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -111,9 +111,10 @@ namespace llvm { /// Return with a flag operand, matched by 'blr' RET_FLAG, - /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions. - /// This copies the bits corresponding to the specified CRREG into the - /// resultant GPR. Bits corresponding to other CR regs are undefined. + /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF + /// instructions. This copies the bits corresponding to the specified + /// CRREG into the resultant GPR. Bits corresponding to other CR regs + /// are undefined. MFCR, /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index ae1fbd8..1b7a778 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -203,8 +203,6 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - BuildMI(MBB, MI, DL, get(PPC::NOP)); } @@ -347,15 +345,13 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC != SrcRC) { // Not yet supported! return false; } - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (DestRC == PPC::GPRCRegisterClass) { BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg); } else if (DestRC == PPC::G8RCRegisterClass) { @@ -446,7 +442,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, // issue a MFCR to save all of the CRBits. unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? PPC::R2 : PPC::R0; - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg)); + NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg) + .addReg(SrcReg, getKillRegState(isKill))); // If the saved register wasn't CR0, shift the bits left so that they are // in CR0's slot. @@ -520,7 +517,8 @@ void PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; @@ -635,7 +633,8 @@ void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 9fb6e7d..7a9e11b 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -114,17 +114,20 @@ public: MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 532a3ec..63b4581 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -662,7 +662,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), [(PPCstcx GPRC:$rS, xoaddr:$dst)]>, isDOT; -let isBarrier = 1, hasCtrlDep = 1 in +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; //===----------------------------------------------------------------------===// @@ -862,7 +862,6 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), [(store F8RC:$frS, xaddr:$dst)]>; } -let isBarrier = 1 in def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; @@ -1118,14 +1117,17 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; -// FIXME: this Uses all the CR registers. Marking it as such is -// necessary for DeadMachineInstructionElim to do the right thing. -// However, marking it also exposes PR 2964, and causes crashes in -// the Local RA because it doesn't like this sequence: + +// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters; +// declaring that here gives the local register allocator problems with this: // vreg = MCRF CR0 // MFCR <kill of whatever preg got assigned to vreg> -// For now DeadMachineInstructionElim is turned off, so don't do the marking. -def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), "mfcr $rT", SprMFCR>, +// while not declaring it breaks DeadMachineInstructionElimination. +// As it turns out, in all cases where we currently use this, +// we're only interested in one subregister of it. Represent this in the +// instruction to keep the register allocator from becoming confused. +def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), + "mfcr $rT ${:comment} $FXM", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "mfcr $rT, $FXM", SprMFCR>, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 5f1e04e..0ff852c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -689,19 +689,15 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; unsigned Reg = findScratchRegister(II, RS, RC, SPAdj); + unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue - // an MFCR to save all of the CRBits. Add an implicit kill of the CR. - if (!MI.getOperand(0).isKill()) - BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg); - else - // Implicitly kill the CR register. - BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg) - .addReg(MI.getOperand(0).getReg(), RegState::ImplicitKill); + // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg. + BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. - unsigned SrcReg = MI.getOperand(0).getReg(); if (SrcReg != PPC::CR0) // rlwinm rA, rA, ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg) @@ -1009,7 +1005,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame MFI->setStackSize(0); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 1cb7340..8604f54 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -10,6 +10,15 @@ // //===----------------------------------------------------------------------===// +let Namespace = "PPC" in { +def sub_lt : SubRegIndex; +def sub_gt : SubRegIndex; +def sub_eq : SubRegIndex; +def sub_un : SubRegIndex; +def sub_32 : SubRegIndex; +} + + class PPCReg<string n> : Register<n> { let Namespace = "PPC"; } @@ -25,6 +34,7 @@ class GPR<bits<5> num, string n> : PPCReg<n> { class GP8<GPR SubReg, string n> : PPCReg<n> { field bits<5> Num = SubReg.Num; let SubRegs = [SubReg]; + let SubRegIndices = [sub_32]; } // SPR - One of the 32-bit special-purpose registers @@ -225,6 +235,7 @@ def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>; def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>; // Condition registers +let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in { def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>; def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>; def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>; @@ -233,15 +244,7 @@ def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>; def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>; def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>; def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>; - -def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], - [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>; -def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], - [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>; -def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], - [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>; -def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7], - [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>; +} // Link register def LR : SPR<8, "lr">, DwarfRegNum<[65]>; @@ -372,7 +375,7 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32, def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, CR3, CR4]> { - let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC]; + let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)]; } def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index c0004a9..d4258b4 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "powerpc-selectiondag-info" -#include "PPCSelectionDAGInfo.h" +#include "PPCTargetMachine.h" using namespace llvm; -PPCSelectionDAGInfo::PPCSelectionDAGInfo() { +PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } PPCSelectionDAGInfo::~PPCSelectionDAGInfo() { diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h index 3ad3418..341b69c 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class PPCTargetMachine; + class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { public: - PPCSelectionDAGInfo(); + explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM); ~PPCSelectionDAGInfo(); }; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index c4a7408..10cd10b 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -44,7 +44,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), - FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this), + FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), + TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { if (getRelocationModel() == Reloc::Default) { diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 35e33a2..626ddbb 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -19,6 +19,7 @@ #include "PPCJITInfo.h" #include "PPCInstrInfo.h" #include "PPCISelLowering.h" +#include "PPCSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -35,6 +36,7 @@ class PPCTargetMachine : public LLVMTargetMachine { PPCFrameInfo FrameInfo; PPCJITInfo JITInfo; PPCTargetLowering TLInfo; + PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; public: @@ -47,6 +49,9 @@ public: virtual const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const PPCRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 144bf5d..7fa73ed 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -1833,6 +1833,21 @@ entry: We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates. See GCC PR34949 +Another interesting case is that something related could be used for variables +that go const after their ctor has finished. In these cases, globalopt (which +can statically run the constructor) could mark the global const (so it gets put +in the readonly section). A testcase would be: + +#include <complex> +using namespace std; +const complex<char> should_be_in_rodata (42,-42); +complex<char> should_be_in_data (42,-42); +complex<char> should_be_in_bss; + +Where we currently evaluate the ctors but the globals don't become const because +the optimizer doesn't know they "become const" after the ctor is done. See +GCC PR4131 for more examples. + //===---------------------------------------------------------------------===// In this code: diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index e494d7d..8e49eca 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -122,15 +122,13 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC != SrcRC) { // Not yet supported! return false; } - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - if (DestRC == SP::IntRegsRegisterClass) BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg); else if (DestRC == SP::FPRegsRegisterClass) @@ -148,7 +146,8 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB, void SparcInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -169,7 +168,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, void SparcInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 345674b..a00ba39 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -74,17 +74,20 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index 2b05c19..fede929 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -20,6 +20,11 @@ class SparcCtrlReg<string n>: Register<n> { let Namespace = "SP"; } +let Namespace = "SP" in { +def sub_even : SubRegIndex; +def sub_odd : SubRegIndex; +} + // Registers are identified with 5-bit ID numbers. // Ri - 32-bit integer registers class Ri<bits<5> num, string n> : SparcReg<n> { @@ -33,6 +38,7 @@ class Rf<bits<5> num, string n> : SparcReg<n> { class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> { let Num = num; let SubRegs = subregs; + let SubRegIndices = [sub_even, sub_odd]; } // Control Registers diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp index 4825aa9..190c575 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sparc-selectiondag-info" -#include "SparcSelectionDAGInfo.h" +#include "SparcTargetMachine.h" using namespace llvm; -SparcSelectionDAGInfo::SparcSelectionDAGInfo() { +SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SparcSelectionDAGInfo::~SparcSelectionDAGInfo() { diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h index bc1b561..dcd4203 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.h +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class SparcTargetMachine; + class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SparcSelectionDAGInfo(); + explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM); ~SparcSelectionDAGInfo(); }; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index a676623..b58d6ba 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -34,7 +34,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64bit), DataLayout(Subtarget.getDataLayout()), - TLInfo(*this), InstrInfo(Subtarget), + TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 1367a31..322c82a 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -20,6 +20,7 @@ #include "SparcInstrInfo.h" #include "SparcSubtarget.h" #include "SparcISelLowering.h" +#include "SparcSelectionDAGInfo.h" namespace llvm { @@ -27,6 +28,7 @@ class SparcTargetMachine : public LLVMTargetMachine { SparcSubtarget Subtarget; const TargetData DataLayout; // Calculates type size & alignment SparcTargetLowering TLInfo; + SparcSelectionDAGInfo TSInfo; SparcInstrInfo InstrInfo; TargetFrameInfo FrameInfo; public: @@ -42,6 +44,9 @@ public: virtual const SparcTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index 2094cc9..b35190a 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -359,29 +359,25 @@ void SubtargetFeatures::dump() const { print(dbgs()); } -/// getDefaultSubtargetFeatures - Return a string listing -/// the features associated with the target triple. +/// getDefaultSubtargetFeatures - Return a string listing the features +/// associated with the target triple. /// /// FIXME: This is an inelegant way of specifying the features of a /// subtarget. It would be better if we could encode this information /// into the IR. See <rdar://5972456>. /// -std::string SubtargetFeatures::getDefaultSubtargetFeatures( - const Triple& Triple) { - switch (Triple.getVendor()) { - case Triple::Apple: - switch (Triple.getArch()) { - case Triple::ppc: // powerpc-apple-* - return std::string("altivec"); - case Triple::ppc64: // powerpc64-apple-* - return std::string("64bit,altivec"); - default: - break; +void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU, + const Triple& Triple) { + setCPU(CPU); + + if (Triple.getVendor() == Triple::Apple) { + if (Triple.getArch() == Triple::ppc) { + // powerpc-apple-* + AddFeature("altivec"); + } else if (Triple.getArch() == Triple::ppc64) { + // powerpc64-apple-* + AddFeature("64bit"); + AddFeature("altivec"); } - break; - default: - break; - } - - return std::string(""); + } } diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp index 07cfb2c..90be222 100644 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -124,9 +124,9 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { if (strncmp(Modifier + 7, "even", 4) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_EVEN); + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32); else if (strncmp(Modifier + 7, "odd", 3) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_ODD); + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32); else assert(0 && "Invalid subreg modifier"); } diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 75d563b..bb2952a 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -30,11 +30,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -static const unsigned subreg_even32 = 1; -static const unsigned subreg_odd32 = 2; -static const unsigned subreg_even = 3; -static const unsigned subreg_odd = 4; - namespace { /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's /// instead of register numbers for the leaves of the matched tree. @@ -644,7 +639,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { Dividend = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT, SDValue(Tmp, 0), SDValue(Dividend, 0), - CurDAG->getTargetConstant(subreg_odd, MVT::i32)); + CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32)); SDNode *Result; SDValue DivVal = SDValue(Dividend, 0); @@ -660,7 +655,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the division (odd subreg) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_odd32 : SystemZ::subreg_odd); SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -673,7 +669,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_even32 : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -718,7 +715,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResVT); { - unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_odd32 : SystemZ::subreg_odd); Dividend = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT, SDValue(Tmp, 0), SDValue(Dividend, 0), @@ -742,7 +740,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the division (odd subreg) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_odd32 : SystemZ::subreg_odd); SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -754,7 +753,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); + unsigned SubRegIdx = (is32Bit ? + SystemZ::subreg_even32 : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index e98f18b..76f2901 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -81,7 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : // LLVM's current latency-oriented scheduler can't handle physreg definitions // such as SystemZ has with PSW, so set this to the register-pressure // scheduler, because it can. - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); setBooleanContents(ZeroOrOneBooleanContent); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index c92caa4..043686c 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -61,7 +61,8 @@ static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) { void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -90,7 +91,8 @@ void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const{ + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const{ DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -119,9 +121,8 @@ bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // Determine if DstRC and SrcRC have a common superclass. const TargetRegisterClass *CommonRC = DestRC; @@ -269,7 +270,8 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, bool SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -333,7 +335,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetRegisterClass *RegClass = CSI[i].getRegClass(); if (RegClass == &SystemZ::FP64RegClass) { MBB.addLiveIn(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass, + &RI); } } @@ -343,7 +346,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -359,7 +363,7 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RegClass = CSI[i].getRegClass(); if (RegClass == &SystemZ::FP64RegClass) - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); } // Restore GP registers diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index ef3b39e..a753f14 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -63,7 +63,8 @@ public: bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -75,18 +76,22 @@ public: MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 99e396a..42aa5dd 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -1,4 +1,4 @@ -//===- SystemZRegisterInfo.h - SystemZ Register Information Impl ----*- C++ -*-===// +//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,15 +19,6 @@ namespace llvm { -namespace SystemZ { - /// SubregIndex - The index of various sized subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// SystemZRegisterInfo.td file. - enum SubregIndex { - SUBREG_32BIT = 1, SUBREG_EVEN = 1, SUBREG_ODD = 2 - }; -} - class SystemZSubtarget; class SystemZInstrInfo; class Type; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 8795847..b561744 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -53,6 +53,14 @@ class FPRL<bits<4> num, string n, list<Register> subregs> field bits<4> Num = num; } +let Namespace = "SystemZ" in { +def subreg_32bit : SubRegIndex; +def subreg_even32 : SubRegIndex; +def subreg_odd32 : SubRegIndex; +def subreg_even : SubRegIndex; +def subreg_odd : SubRegIndex; +} + // General-purpose registers def R0W : GPR32< 0, "r0">, DwarfRegNum<[0]>; def R1W : GPR32< 1, "r1">, DwarfRegNum<[1]>; @@ -71,6 +79,7 @@ def R13W : GPR32<13, "r13">, DwarfRegNum<[13]>; def R14W : GPR32<14, "r14">, DwarfRegNum<[14]>; def R15W : GPR32<15, "r15">, DwarfRegNum<[15]>; +let SubRegIndices = [subreg_32bit] in { def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>; def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>; def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>; @@ -87,8 +96,10 @@ def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>; def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>; def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>; def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>; +} // Register pairs +let SubRegIndices = [subreg_even32, subreg_odd32] in { def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>; def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>; def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>; @@ -97,7 +108,11 @@ def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>, DwarfRegNum<[8]>; def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>, DwarfRegNum<[10]>; def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>, DwarfRegNum<[12]>; def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>; +} +let SubRegIndices = [subreg_even, subreg_odd], + CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit), + (subreg_odd32 subreg_odd, subreg_32bit)] in { def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>; def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>; def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>; @@ -106,6 +121,7 @@ def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>, DwarfRegNum<[8]>; def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>, DwarfRegNum<[10]>; def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>, DwarfRegNum<[12]>; def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>, DwarfRegNum<[14]>; +} // Floating-point registers def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>; @@ -125,6 +141,7 @@ def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>; def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>; def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>; +let SubRegIndices = [subreg_32bit] in { def F0L : FPRL< 0, "f0", [F0S]>, DwarfRegNum<[16]>; def F1L : FPRL< 1, "f1", [F1S]>, DwarfRegNum<[17]>; def F2L : FPRL< 2, "f2", [F2S]>, DwarfRegNum<[18]>; @@ -141,39 +158,11 @@ def F12L : FPRL<12, "f12", [F12S]>, DwarfRegNum<[28]>; def F13L : FPRL<13, "f13", [F13S]>, DwarfRegNum<[29]>; def F14L : FPRL<14, "f14", [F14S]>, DwarfRegNum<[30]>; def F15L : FPRL<15, "f15", [F15S]>, DwarfRegNum<[31]>; +} // Status register def PSW : SystemZReg<"psw">; -def subreg_32bit : PatLeaf<(i32 1)>; -def subreg_even32 : PatLeaf<(i32 1)>; -def subreg_odd32 : PatLeaf<(i32 2)>; -def subreg_even : PatLeaf<(i32 3)>; -def subreg_odd : PatLeaf<(i32 4)>; - -def : SubRegSet<1, [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], - [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<3, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], - [R0D, R2D, R4D, R6D, R8D, R10D, R12D, R14D]>; - -def : SubRegSet<4, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], - [R1D, R3D, R5D, R7D, R9D, R11D, R13D, R15D]>; - -def : SubRegSet<1, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P], - [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>; - -def : SubRegSet<2, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P], - [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>; - -def : SubRegSet<1, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], - [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>; - -def : SubRegSet<2, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], - [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>; - /// Register classes def GR32 : RegisterClass<"SystemZ", [i32], 32, // Volatile registers @@ -276,7 +265,7 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64, // Volatile, but not allocable R14D, R15D]> { - let SubRegClassList = [GR32]; + let SubRegClasses = [(GR32 subreg_32bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -323,7 +312,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, // Volatile, but not allocable R14D, R15D]> { - let SubRegClassList = [ADDR32]; + let SubRegClasses = [(ADDR32 subreg_32bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -366,7 +355,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, def GR64P : RegisterClass<"SystemZ", [v2i32], 64, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> { - let SubRegClassList = [GR32, GR32]; + let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -402,7 +391,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64, def GR128 : RegisterClass<"SystemZ", [v2i64], 128, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> { - let SubRegClassList = [GR32, GR32, GR64, GR64]; + let SubRegClasses = [(GR32 subreg_even32, subreg_odd32), + (GR64 subreg_even, subreg_odd)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -462,7 +452,7 @@ def FP32 : RegisterClass<"SystemZ", [f32], 32, def FP64 : RegisterClass<"SystemZ", [f64], 64, [F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L, F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> { - let SubRegClassList = [FP32]; + let SubRegClasses = [(FP32 subreg_32bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 87c831b..3eabcd2 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "systemz-selectiondag-info" -#include "SystemZSelectionDAGInfo.h" +#include "SystemZTargetMachine.h" using namespace llvm; -SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() { +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 5292de9..1450401 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class SystemZTargetMachine; + class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SystemZSelectionDAGInfo(); + explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM); ~SystemZSelectionDAGInfo(); }; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index dfa26a1..f45827b 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -29,7 +29,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, Subtarget(TT, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" "-f64:64:64-f128:128:128-a0:16:16-n32:64"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) { if (getRelocationModel() == Reloc::Default) diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index d3357cc..6af829b 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -17,6 +17,7 @@ #include "SystemZInstrInfo.h" #include "SystemZISelLowering.h" +#include "SystemZSelectionDAGInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSubtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ class SystemZTargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment SystemZInstrInfo InstrInfo; SystemZTargetLowering TLInfo; + SystemZSelectionDAGInfo TSInfo; // SystemZ does not have any call stack frame, therefore not having // any SystemZ specific FrameInfo class. @@ -53,6 +55,10 @@ public: return &TLInfo; } + virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // SystemZTargetMachine. diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index ac67c91..df52368 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -212,7 +212,8 @@ FunctionSections("ffunction-sections", // TargetMachine::TargetMachine(const Target &T) - : TheTarget(T), AsmInfo(0) { + : TheTarget(T), AsmInfo(0), + MCRelaxAll(false) { // Typically it will be subtargets that will adjust FloatABIType from Default // to Soft or Hard. if (UseSoftFloat) @@ -273,13 +274,14 @@ namespace llvm { /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. bool DisableFramePointerElim(const MachineFunction &MF) { - if (NoFramePointerElim) - return true; - if (NoFramePointerElimNonLeaf) { + // Check to see if we should eliminate non-leaf frame pointers and then + // check to see if we should eliminate all frame pointers. + if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->hasCalls(); } - return false; + + return NoFramePointerElim; } /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index 52983ff..dcc5f61 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -22,14 +22,14 @@ using namespace llvm; TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, regclass_iterator RCB, regclass_iterator RCE, + const char *const *subregindexnames, int CFSO, int CFDO, const unsigned* subregs, const unsigned subregsize, - const unsigned* superregs, const unsigned superregsize, const unsigned* aliases, const unsigned aliasessize) : SubregHash(subregs), SubregHashSize(subregsize), - SuperregHash(superregs), SuperregHashSize(superregsize), AliasesHash(aliases), AliasesHashSize(aliasessize), - Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) { + Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR), + RegClassBegin(RCB), RegClassEnd(RCE) { assert(NumRegs < FirstVirtualRegister && "Target has too many physical registers!"); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6b403c1..40a6a7b 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -186,20 +186,73 @@ struct X86Operand : public MCParsedAsmOperand { bool isImm() const { return Kind == Immediate; } - bool isImmSExt8() const { - // Accept immediates which fit in 8 bits when sign extended, and - // non-absolute immediates. + bool isImmSExti16i8() const { if (!isImm()) return false; - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) { - int64_t Value = CE->getValue(); - return Value == (int64_t) (int8_t) Value; - } + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; - return true; + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); } - + bool isImmSExti32i8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isImmSExti64i8() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000000000007FULL)|| + (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isImmSExti64i32() const { + if (!isImm()) + return false; + + // If this isn't a constant expr, just assume it fits and let relaxation + // handle it. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) + return true; + + // Otherwise, check the value is in a range that makes sense for this + // extension. + uint64_t Value = CE->getValue(); + return (( Value <= 0x000000007FFFFFFFULL)|| + (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL)); + } + bool isMem() const { return Kind == Memory; } bool isAbsMem() const { @@ -231,12 +284,6 @@ struct X86Operand : public MCParsedAsmOperand { addExpr(Inst, getImm()); } - void addImmSExt8Operands(MCInst &Inst, unsigned N) const { - // FIXME: Support user customization of the render method. - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addMemOperands(MCInst &Inst, unsigned N) const { assert((N == 5) && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); @@ -535,6 +582,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { bool X86ATTAsmParser:: ParseInstruction(const StringRef &Name, SMLoc NameLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // The various flavors of pushf and popf use Requires<In32BitMode> and + // Requires<In64BitMode>, but the assembler doesn't yet implement that. + // For now, just do a manual check to prevent silent misencoding. + if (Is64Bit) { + if (Name == "popfl") + return Error(NameLoc, "popfl cannot be encoded in 64-bit mode"); + else if (Name == "pushfl") + return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode"); + } else { + if (Name == "popfq") + return Error(NameLoc, "popfq cannot be encoded in 32-bit mode"); + else if (Name == "pushfq") + return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); + } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to // represent alternative syntaxes in the .td file, without requiring // instruction duplication. @@ -547,9 +609,66 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("repe", "rep") .Case("repz", "rep") .Case("repnz", "repne") + .Case("pushf", Is64Bit ? "pushfq" : "pushfl") + .Case("popf", Is64Bit ? "popfq" : "popfl") + .Case("retl", Is64Bit ? "retl" : "ret") + .Case("retq", Is64Bit ? "ret" : "retq") + .Case("setz", "sete") + .Case("setnz", "setne") + .Case("jz", "je") + .Case("jnz", "jne") + .Case("cmovcl", "cmovbl") + .Case("cmovcl", "cmovbl") + .Case("cmovnal", "cmovbel") + .Case("cmovnbl", "cmovael") + .Case("cmovnbel", "cmoval") + .Case("cmovncl", "cmovael") + .Case("cmovngl", "cmovlel") + .Case("cmovnl", "cmovgel") + .Case("cmovngl", "cmovlel") + .Case("cmovngel", "cmovll") + .Case("cmovnll", "cmovgel") + .Case("cmovnlel", "cmovgl") + .Case("cmovnzl", "cmovnel") + .Case("cmovzl", "cmovel") .Default(Name); + + // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. + const MCExpr *ExtraImmOp = 0; + if (PatchedName.startswith("cmp") && + (PatchedName.endswith("ss") || PatchedName.endswith("sd") || + PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { + unsigned SSEComparisonCode = StringSwitch<unsigned>( + PatchedName.slice(3, PatchedName.size() - 2)) + .Case("eq", 0) + .Case("lt", 1) + .Case("le", 2) + .Case("unord", 3) + .Case("neq", 4) + .Case("nlt", 5) + .Case("nle", 6) + .Case("ord", 7) + .Default(~0U); + if (SSEComparisonCode != ~0U) { + ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, + getParser().getContext()); + if (PatchedName.endswith("ss")) { + PatchedName = "cmpss"; + } else if (PatchedName.endswith("sd")) { + PatchedName = "cmpsd"; + } else if (PatchedName.endswith("ps")) { + PatchedName = "cmpps"; + } else { + assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); + PatchedName = "cmppd"; + } + } + } Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + if (ExtraImmOp) + Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc)); + if (getLexer().isNot(AsmToken::EndOfStatement)) { // Parse '*' modifier. @@ -564,7 +683,7 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.push_back(Op); else return true; - + while (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. @@ -587,6 +706,17 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 1); } + // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as + // "f{mul*,add*,sub*,div*} $op" + if ((Name.startswith("fmul") || Name.startswith("fadd") || + Name.startswith("fsub") || Name.startswith("fdiv")) && + Operands.size() == 3 && + static_cast<X86Operand*>(Operands[2])->isReg() && + static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) { + delete Operands[2]; + Operands.erase(Operands.begin() + 2); + } + return false; } @@ -622,6 +752,31 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a +/// imm operand, to having "rm" or "mr" operands with the offset in the disp +/// field. +static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, + bool isMR) { + MCOperand Disp = Inst.getOperand(0); + + // Start over with an empty instruction. + Inst = MCInst(); + Inst.setOpcode(Opc); + + if (!isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); + + // Add the mem operand. + Inst.addOperand(MCOperand::CreateReg(0)); // Segment + Inst.addOperand(MCOperand::CreateImm(1)); // Scale + Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg + Inst.addOperand(Disp); // Displacement + Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg + + if (isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); +} + // FIXME: Custom X86 cleanup function to implement a temporary hack to handle // matching INCL/DECL correctly for x86_64. This needs to be replaced by a // proper mechanism for supporting (ambiguous) feature dependent instructions. @@ -637,6 +792,14 @@ void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; + + // moffset instructions are x86-32 only. + case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; + case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; + case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; + case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; + case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; + case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; } } @@ -673,6 +836,8 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> bool MatchW = MatchInstructionImpl(Operands, Inst); Tmp[Base.size()] = 'l'; bool MatchL = MatchInstructionImpl(Operands, Inst); + Tmp[Base.size()] = 'q'; + bool MatchQ = MatchInstructionImpl(Operands, Inst); // Restore the old token. Op->setTokenValue(Base); @@ -680,7 +845,7 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing // matches won't have modified it). - if (MatchB + MatchW + MatchL == 2) + if (MatchB + MatchW + MatchL + MatchQ == 3) return false; // Otherwise, the match failed. diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 8b0ed1c..183213d 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -58,12 +58,11 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); if (Subtarget->isTargetCOFF()) { - const Function *F = MF.getFunction(); - OutStreamer.EmitRawText("\t.def\t " + Twine(CurrentFnSym->getName()) + - ";\t.scl\t" + - Twine(F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) + - ";\t.type\t" + Twine(COFF::DT_FCN << COFF::N_BTSHFT) - + ";\t.endef"); + bool Intrn = MF.getFunction()->hasInternalLinkage(); + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT); + OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EndCOFFSymbolDef(); } // Have common code print out the function header with linkage info etc. @@ -571,44 +570,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { MMI->getObjFileInfo<X86COFFMachineModuleInfo>(); // Emit type information for external functions - for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), - E = COFFMMI.stub_end(); I != E; ++I) { - OutStreamer.EmitRawText("\t.def\t " + Twine(I->getKeyData()) + - ";\t.scl\t" + Twine(COFF::C_EXT) + - ";\t.type\t" + - Twine(COFF::DT_FCN << COFF::N_BTSHFT) + - ";\t.endef"); + typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator; + for (externals_iterator I = COFFMMI.externals_begin(), + E = COFFMMI.externals_end(); + I != E; ++I) { + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT); + OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT); + OutStreamer.EndCOFFSymbolDef(); } - if (Subtarget->isTargetCygMing()) { - // Necessary for dllexport support - std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; - - const TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); - - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedFns.push_back(Mang->getSymbol(I)); - - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) - if (I->hasDLLExportLinkage()) - DLLExportedGlobals.push_back(Mang->getSymbol(I)); - - // Output linker support code for dllexported globals on windows. - if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { - OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", - true, - SectionKind::getMetadata())); - for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) - OutStreamer.EmitRawText("\t.ascii \" -export:" + - Twine(DLLExportedGlobals[i]->getName()) + - ",data\""); - - for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) - OutStreamer.EmitRawText("\t.ascii \" -export:" + - Twine(DLLExportedFns[i]->getName()) + "\""); + // Necessary for dllexport support + std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; + + const TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedFns.push_back(Mang->getSymbol(I)); + + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedGlobals.push_back(Mang->getSymbol(I)); + + // Output linker support code for dllexported globals on windows. + if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { + OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection()); + SmallString<128> name; + for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) { + if (Subtarget->isTargetWindows()) + name = " /EXPORT:"; + else + name = " -export:"; + name += DLLExportedGlobals[i]->getName(); + if (Subtarget->isTargetWindows()) + name += ",DATA"; + else + name += ",data"; + OutStreamer.EmitBytes(name, 0); + } + + for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) { + if (Subtarget->isTargetWindows()) + name = " /EXPORT:"; + else + name = " -export:"; + name += DLLExportedFns[i]->getName(); + OutStreamer.EmitBytes(name, 0); } } } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index 95984b2..b5a7f8d 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -31,7 +31,7 @@ class MCInst; class MCStreamer; class MCSymbol; -class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { +class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; public: explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index effc8ed..4edeca9 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -224,6 +224,60 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) { OutMI.addOperand(OutMI.getOperand(0)); } +/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with +/// a short fixed-register form. +static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { + unsigned ImmOp = Inst.getNumOperands() - 1; + assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() && + ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) || + Inst.getNumOperands() == 2) && "Unexpected instruction!"); + + // Check whether the destination register can be fixed. + unsigned Reg = Inst.getOperand(0).getReg(); + if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX) + return; + + // If so, rewrite the instruction. + MCOperand Saved = Inst.getOperand(ImmOp); + Inst = MCInst(); + Inst.setOpcode(Opcode); + Inst.addOperand(Saved); +} + +/// \brief Simplify things like MOV32rm to MOV32o32a. +static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) { + bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg(); + unsigned AddrBase = IsStore; + unsigned RegOp = IsStore ? 0 : 5; + unsigned AddrOp = AddrBase + 3; + assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() && + Inst.getOperand(AddrBase + 0).isReg() && // base + Inst.getOperand(AddrBase + 1).isImm() && // scale + Inst.getOperand(AddrBase + 2).isReg() && // index register + (Inst.getOperand(AddrOp).isExpr() || // address + Inst.getOperand(AddrOp).isImm())&& + Inst.getOperand(AddrBase + 4).isReg() && // segment + "Unexpected instruction!"); + + // Check whether the destination register can be fixed. + unsigned Reg = Inst.getOperand(RegOp).getReg(); + if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX) + return; + + // Check whether this is an absolute address. + if (Inst.getOperand(AddrBase + 0).getReg() != 0 || + Inst.getOperand(AddrBase + 2).getReg() != 0 || + Inst.getOperand(AddrBase + 4).getReg() != 0 || + Inst.getOperand(AddrBase + 1).getImm() != 1) + return; + + // If so, rewrite the instruction. + MCOperand Saved = Inst.getOperand(AddrOp); + Inst = MCInst(); + Inst.setOpcode(Opcode); + Inst.addOperand(Saved); +} void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); @@ -309,8 +363,32 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0 LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - - + + // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have + // register inputs modeled as normal uses instead of implicit uses. As such, + // truncate off all but the first operand (the callee). FIXME: Change isel. + case X86::TAILJMPr: + case X86::TAILJMPr64: + case X86::CALL64r: + case X86::CALL64pcrel32: { + unsigned Opcode = OutMI.getOpcode(); + MCOperand Saved = OutMI.getOperand(0); + OutMI = MCInst(); + OutMI.setOpcode(Opcode); + OutMI.addOperand(Saved); + break; + } + + // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. + case X86::TAILJMPd: + case X86::TAILJMPd64: { + MCOperand Saved = OutMI.getOperand(0); + OutMI = MCInst(); + OutMI.setOpcode(X86::TAILJMP_1); + OutMI.addOperand(Saved); + break; + } + // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the @@ -332,6 +410,61 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break; case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break; case X86::JG_4: OutMI.setOpcode(X86::JG_1); break; + + // We don't currently select the correct instruction form for instructions + // which have a short %eax, etc. form. Handle this by custom lowering, for + // now. + // + // Note, we are currently not handling the following instructions: + // MOV64ao8, MOV64o8a + // XCHG16ar, XCHG32ar, XCHG64ar + case X86::MOV8mr_NOREX: + case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break; + case X86::MOV8rm_NOREX: + case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break; + case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break; + case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break; + case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break; + case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break; + case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break; + case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break; + + case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break; + case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break; + case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break; + case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break; + case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break; + case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break; + case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break; + case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break; + case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break; + case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break; + case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break; + case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break; + case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break; + case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break; + case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break; + case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break; + case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break; + case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break; + case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break; + case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break; + case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break; + case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break; + case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break; + case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break; + case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break; + case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break; + case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break; + case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break; + case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break; + case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break; + case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break; + case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break; + case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break; + case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break; + case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break; + case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break; } } @@ -346,7 +479,7 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, // cast away const; DIetc do not take const operands for some reason. DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); if (V.getContext().isSubprogram()) - O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":"; + O << DISubprogram(V.getContext()).getDisplayName() << ":"; O << V.getName(); O << " <- "; // Frame address. Currently handles register +- offset only. diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h index ebd23f6..9e5474f 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -25,7 +25,7 @@ namespace llvm { class X86Subtarget; /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. -class VISIBILITY_HIDDEN X86MCInstLower { +class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; Mangler *Mang; X86AsmPrinter &AsmPrinter; diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index da6bb91..1334820 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -39,7 +39,12 @@ set(sources if( CMAKE_CL_64 ) enable_language(ASM_MASM) - set(sources ${sources} X86CompilationCallback_Win64.asm) + ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj + COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm + ) + set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj) endif() add_llvm_target(X86CodeGen ${sources}) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 62e7357..8a5a630 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -155,7 +155,57 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// /// @param mcInst - The MCInst to append to. /// @param immediate - The immediate value to append. -static void translateImmediate(MCInst &mcInst, uint64_t immediate) { +/// @param operand - The operand, as stored in the descriptor table. +/// @param insn - The internal instruction. +static void translateImmediate(MCInst &mcInst, + uint64_t immediate, + OperandSpecifier &operand, + InternalInstruction &insn) { + // Sign-extend the immediate if necessary. + + OperandType type = operand.type; + + if (type == TYPE_RELv) { + switch (insn.displacementSize) { + default: + break; + case 8: + type = TYPE_MOFFS8; + break; + case 16: + type = TYPE_MOFFS16; + break; + case 32: + type = TYPE_MOFFS32; + break; + case 64: + type = TYPE_MOFFS64; + break; + } + } + + switch (type) { + case TYPE_MOFFS8: + case TYPE_REL8: + if(immediate & 0x80) + immediate |= ~(0xffull); + break; + case TYPE_MOFFS16: + if(immediate & 0x8000) + immediate |= ~(0xffffull); + break; + case TYPE_MOFFS32: + case TYPE_REL32: + case TYPE_REL64: + if(immediate & 0x80000000) + immediate |= ~(0xffffffffull); + break; + case TYPE_MOFFS64: + default: + // operand is 64 bits wide. Do nothing. + break; + } + mcInst.addOperand(MCOperand::CreateImm(immediate)); } @@ -370,8 +420,7 @@ static bool translateRM(MCInst &mcInst, case TYPE_XMM64: case TYPE_XMM128: case TYPE_DEBUGREG: - case TYPE_CR32: - case TYPE_CR64: + case TYPE_CONTROLREG: return translateRMRegister(mcInst, insn); case TYPE_M: case TYPE_M8: @@ -447,8 +496,10 @@ static bool translateOperand(MCInst &mcInst, case ENCODING_IO: case ENCODING_Iv: case ENCODING_Ia: - translateImmediate(mcInst, - insn.immediates[insn.numImmediatesTranslated++]); + translateImmediate(mcInst, + insn.immediates[insn.numImmediatesTranslated++], + operand, + insn); return false; case ENCODING_RB: case ENCODING_RW: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 64f6b2d..6c3ff6b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1034,14 +1034,10 @@ static int readModRM(struct InternalInstruction* insn) { if (index > 7) \ *valid = 0; \ return prefix##_DR0 + index; \ - case TYPE_CR32: \ - if (index > 7) \ - *valid = 0; \ - return prefix##_ECR0 + index; \ - case TYPE_CR64: \ + case TYPE_CONTROLREG: \ if (index > 8) \ *valid = 0; \ - return prefix##_RCR0 + index; \ + return prefix##_CR0 + index; \ } \ } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 462cf68..28ba86b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -225,26 +225,16 @@ extern "C" { ENTRY(DR6) \ ENTRY(DR7) -#define REGS_CONTROL_32BIT \ - ENTRY(ECR0) \ - ENTRY(ECR1) \ - ENTRY(ECR2) \ - ENTRY(ECR3) \ - ENTRY(ECR4) \ - ENTRY(ECR5) \ - ENTRY(ECR6) \ - ENTRY(ECR7) - -#define REGS_CONTROL_64BIT \ - ENTRY(RCR0) \ - ENTRY(RCR1) \ - ENTRY(RCR2) \ - ENTRY(RCR3) \ - ENTRY(RCR4) \ - ENTRY(RCR5) \ - ENTRY(RCR6) \ - ENTRY(RCR7) \ - ENTRY(RCR8) +#define REGS_CONTROL \ + ENTRY(CR0) \ + ENTRY(CR1) \ + ENTRY(CR2) \ + ENTRY(CR3) \ + ENTRY(CR4) \ + ENTRY(CR5) \ + ENTRY(CR6) \ + ENTRY(CR7) \ + ENTRY(CR8) #define ALL_EA_BASES \ EA_BASES_16BIT \ @@ -264,8 +254,7 @@ extern "C" { REGS_XMM \ REGS_SEGMENT \ REGS_DEBUG \ - REGS_CONTROL_32BIT \ - REGS_CONTROL_64BIT \ + REGS_CONTROL \ ENTRY(RIP) /* diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 4a7cd57..0f33f52 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -280,8 +280,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ - ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \ - ENUM_ENTRY(TYPE_CR64, "8-byte") \ + ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \ \ ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \ ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \ diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index 5e80845..dab070e 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -155,9 +155,7 @@ char SSEDomainFixPass::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. int SSEDomainFixPass::RegIndex(unsigned reg) { - // Registers are sorted lexicographically. - // We just need them to be consecutive, ordering doesn't matter. - assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort"); + assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort"); reg -= X86::XMM0; return reg < NumRegs ? (int) reg : -1; } diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index ba9c1d0..151087f 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -12,6 +12,7 @@ #include "X86FixupKinds.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" @@ -43,20 +44,19 @@ public: X86AsmBackend(const Target &T) : TargetAsmBackend(T) {} - void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF, + void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF, uint64_t Value) const { - unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind); + unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.Offset + Size <= DF.getContents().size() && + assert(Fixup.getOffset() + Size <= DF.getContents().size() && "Invalid fixup offset!"); for (unsigned i = 0; i != Size; ++i) - DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8)); + DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); } - bool MayNeedRelaxation(const MCInst &Inst, - const SmallVectorImpl<MCAsmFixup> &Fixups) const; + bool MayNeedRelaxation(const MCInst &Inst) const; - void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const; + void RelaxInstruction(const MCInst &Inst, MCInst &Res) const; bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; @@ -75,6 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { case X86::JG_1: return X86::JG_4; case X86::JLE_1: return X86::JLE_4; case X86::JL_1: return X86::JL_4; + case X86::TAILJMP_1: case X86::JMP_1: return X86::JMP_4; case X86::JNE_1: return X86::JNE_4; case X86::JNO_1: return X86::JNO_4; @@ -86,35 +87,33 @@ static unsigned getRelaxedOpcode(unsigned Op) { } } -bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst, - const SmallVectorImpl<MCAsmFixup> &Fixups) const { - // Check for a 1byte pcrel fixup, and enforce that we would know how to relax - // this instruction. - for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { - if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) { - assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode()); - return true; - } - } +bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { + // Check if this instruction is ever relaxable. + if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode()) + return false; - return false; + // If so, just assume it can be relaxed. Once we support relaxing more complex + // instructions we should check that the instruction actually has symbolic + // operands before doing this, but we need to be careful about things like + // PCrel. + return true; } // FIXME: Can tblgen help at all here to verify there aren't other instructions // we can relax? -void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF, - MCInst &Res) const { +void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const { // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. - unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode()); + unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode()); - if (RelaxedOp == IF->getInst().getOpcode()) { + if (RelaxedOp == Inst.getOpcode()) { SmallString<256> Tmp; raw_svector_ostream OS(Tmp); - IF->getInst().dump_pretty(OS); + Inst.dump_pretty(OS); + OS << "\n"; report_fatal_error("unexpected instruction to relax: " + OS.str()); } - Res = IF->getInst(); + Res = Inst; Res.setOpcode(RelaxedOp); } @@ -199,6 +198,18 @@ public: } }; +class ELFX86_32AsmBackend : public ELFX86AsmBackend { +public: + ELFX86_32AsmBackend(const Target &T) + : ELFX86AsmBackend(T) {} +}; + +class ELFX86_64AsmBackend : public ELFX86AsmBackend { +public: + ELFX86_64AsmBackend(const Target &T) + : ELFX86AsmBackend(T) {} +}; + class DarwinX86AsmBackend : public X86AsmBackend { public: DarwinX86AsmBackend(const Target &T) @@ -210,7 +221,8 @@ public: bool isVirtualSection(const MCSection &Section) const { const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); return (SMO.getType() == MCSectionMachO::S_ZEROFILL || - SMO.getType() == MCSectionMachO::S_GB_ZEROFILL); + SMO.getType() == MCSectionMachO::S_GB_ZEROFILL || + SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL); } }; @@ -247,6 +259,26 @@ public: const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS; } + + virtual bool isSectionAtomizable(const MCSection &Section) const { + const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); + // Fixed sized data sections are uniqued, they cannot be diced into atoms. + switch (SMO.getType()) { + default: + return true; + + case MCSectionMachO::S_4BYTE_LITERALS: + case MCSectionMachO::S_8BYTE_LITERALS: + case MCSectionMachO::S_16BYTE_LITERALS: + case MCSectionMachO::S_LITERAL_POINTERS: + case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS: + case MCSectionMachO::S_LAZY_SYMBOL_POINTERS: + case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS: + case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS: + case MCSectionMachO::S_INTERPOSING: + return false; + } + } }; } @@ -257,7 +289,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_32AsmBackend(T); default: - return new ELFX86AsmBackend(T); + return new ELFX86_32AsmBackend(T); } } @@ -267,6 +299,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_64AsmBackend(T); default: - return new ELFX86AsmBackend(T); + return new ELFX86_64AsmBackend(T); } } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index eece462..98ab2a6 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -15,7 +15,7 @@ #define X86COFF_MACHINEMODULEINFO_H #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/DenseSet.h" #include "X86MachineFunctionInfo.h" namespace llvm { @@ -25,18 +25,18 @@ namespace llvm { /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation /// for X86 COFF targets. class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { - StringSet<> CygMingStubs; + DenseSet<MCSymbol const *> Externals; public: X86COFFMachineModuleInfo(const MachineModuleInfo &) {} virtual ~X86COFFMachineModuleInfo(); - void addExternalFunction(StringRef Name) { - CygMingStubs.insert(Name); + void addExternalFunction(MCSymbol* Symbol) { + Externals.insert(Symbol); } - typedef StringSet<>::const_iterator stub_iterator; - stub_iterator stub_begin() const { return CygMingStubs.begin(); } - stub_iterator stub_end() const { return CygMingStubs.end(); } + typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator; + externals_iterator externals_begin() const { return Externals.begin(); } + externals_iterator externals_end() const { return Externals.end(); } }; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index fd15efd..a5774e1 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -307,6 +307,20 @@ def CC_X86_32_FastCall : CallingConv<[ CCDelegateTo<CC_X86_32_Common> ]>; +def CC_X86_32_ThisCall : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // The 'nest' parameter, if any, is passed in EAX. + CCIfNest<CCAssignToReg<[EAX]>>, + + // The first integer argument is passed in ECX + CCIfType<[i32], CCAssignToReg<[ECX]>>, + + // Otherwise, same as everything else. + CCDelegateTo<CC_X86_32_Common> +]>; + def CC_X86_32_FastCC : CallingConv<[ // Handles byval parameters. Note that we can't rely on the delegation // to CC_X86_32_Common for this because that happens after code that diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index ff9208c..1bc5eb7 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -180,6 +180,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; + else if (CC == CallingConv::X86_ThisCall) + return CC_X86_32_ThisCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else if (CC == CallingConv::GHC) @@ -324,7 +326,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg) { - unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); + unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, + Src, /*TODO: Kill=*/false); if (RR != 0) { ResultReg = RR; @@ -416,7 +419,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { (S == 1 || S == 2 || S == 4 || S == 8)) { // Scaled-index addressing. Scale = S; - IndexReg = getRegForGEPIndex(Op); + IndexReg = getRegForGEPIndex(Op).first; if (IndexReg == 0) return false; } else @@ -802,7 +805,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. - ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg); + ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); if (ResultReg == 0) return false; UpdateValueMap(I, ResultReg); return true; @@ -913,7 +916,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { const MachineInstr &MI = *RI; - if (MI.modifiesRegister(Reg)) { + if (MI.definesRegister(Reg)) { unsigned Src, Dst, SrcSR, DstSR; if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { @@ -1019,14 +1022,14 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC); + TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL); // The shift instruction uses X86::CL. If we defined a super-register // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what // we're doing here. if (CReg != X86::CL) BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) - .addReg(CReg).addImm(X86::SUBREG_8BIT); + .addReg(CReg).addImm(X86::sub_8bit); unsigned ResultReg = createResultReg(RC); BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); @@ -1133,7 +1136,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, - CopyReg, X86::SUBREG_8BIT); + CopyReg, /*Kill=*/true, + X86::sub_8bit); if (!ResultReg) return false; @@ -1436,7 +1440,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } case CCValAssign::BCvt: { unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), - ISD::BIT_CONVERT, Arg); + ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); assert(BC != 0 && "Failed to emit a bitcast!"); Arg = BC; ArgVT = VA.getLocVT(); @@ -1447,7 +1451,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (VA.isRegLoc()) { TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), - Arg, RC, RC); + Arg, RC, RC, DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; RegArgs.push_back(VA.getLocReg()); @@ -1473,7 +1477,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (Subtarget->isPICStyleGOT()) { TargetRegisterClass *RC = X86::GR32RegisterClass; unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); + bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC, + DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; } @@ -1552,7 +1557,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { unsigned ResultReg = createResultReg(DstRC); bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - RVLocs[0].getLocReg(), DstRC, SrcRC); + RVLocs[0].getLocReg(), DstRC, SrcRC, DL); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; if (CopyVT != RVLocs[0].getValVT()) { diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 541083f..747683d 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" -#include "X86Subtarget.h" #include "llvm/Instructions.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -42,12 +41,70 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; } + virtual const char *getPassName() const { + return "X86 FP_REG_KILL inserter"; + } }; char FPRegKiller::ID = 0; } -FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); } +FunctionPass *llvm::createX87FPRegKillInserterPass() { + return new FPRegKiller(); +} + +/// isFPStackVReg - Return true if the specified vreg is from a fp stack +/// register class. +static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(RegNo)) + return false; + + switch (MRI.getRegClass(RegNo)->getID()) { + default: return false; + case X86::RFP32RegClassID: + case X86::RFP64RegClassID: + case X86::RFP80RegClassID: + return true; + } +} + + +/// ContainsFPStackCode - Return true if the specific MBB has floating point +/// stack code, and thus needs an FP_REG_KILL. +static bool ContainsFPStackCode(MachineBasicBlock *MBB, + const MachineRegisterInfo &MRI) { + // Scan the block, looking for instructions that define fp stack vregs. + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getNumOperands() == 0 || !I->getOperand(0).isReg()) + continue; + + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef()) + continue; + + if (isFPStackVReg(I->getOperand(op).getReg(), MRI)) + return true; + } + } + + // Check PHI nodes in successor blocks. These PHI's will be lowered to have + // a copy of the input value in this block, which is a definition of the + // value. + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); SI != E; ++ SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end(); + I != E; ++I) { + // All PHI nodes are at the top of the block. + if (!I->isPHI()) break; + + if (isFPStackVReg(I->getOperand(0).getReg(), MRI)) + return true; + } + } + + return false; +} bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // If we are emitting FP stack code, scan the basic block to determine if this @@ -65,14 +122,13 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // Fast-path: If nothing is using the x87 registers, we don't need to do // any scanning. - MachineRegisterInfo &MRI = MF.getRegInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() && MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() && MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty()) return false; bool Changed = false; - const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>(); MachineFunction::iterator MBBI = MF.begin(); MachineFunction::iterator EndMBB = MF.end(); for (; MBBI != EndMBB; ++MBBI) { @@ -87,48 +143,8 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { continue; } - bool ContainsFPCode = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - !ContainsFPCode && I != E; ++I) { - if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) { - const TargetRegisterClass *clas; - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (I->getOperand(op).isReg() && I->getOperand(op).isDef() && - TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && - ((clas = MRI.getRegClass(I->getOperand(op).getReg())) == - X86::RFP32RegisterClass || - clas == X86::RFP64RegisterClass || - clas == X86::RFP80RegisterClass)) { - ContainsFPCode = true; - break; - } - } - } - } - // Check PHI nodes in successor blocks. These PHI's will be lowered to have - // a copy of the input value in this block. In SSE mode, we only care about - // 80-bit values. - if (!ContainsFPCode) { - // Final check, check LLVM BB's that are successors to the LLVM BB - // corresponding to BB for FP PHI nodes. - const BasicBlock *LLVMBB = MBB->getBasicBlock(); - const PHINode *PN; - for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); - !ContainsFPCode && SI != E; ++SI) { - for (BasicBlock::const_iterator II = SI->begin(); - (PN = dyn_cast<PHINode>(II)); ++II) { - if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || - (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) || - (!Subtarget.hasSSE2() && - PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { - ContainsFPCode = true; - break; - } - } - } - } - // Finally, if we found any FP code, emit the FP_REG_KILL instruction. - if (ContainsFPCode) { + // If we find any FP stack code, emit the FP_REG_KILL instruction. + if (ContainsFPStackCode(MBB, MRI)) { BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(), MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL)); ++NumFPKill; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index fd8bb1e..0f64383 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1693,7 +1693,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { Result, CurDAG->getTargetConstant(8, MVT::i8)), 0); // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); } else { Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1834,7 +1834,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->getTargetConstant(8, MVT::i8)), 0); // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); } else { Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1883,7 +1883,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Extract the l-register. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Reg); // Emit a testb. @@ -1912,7 +1912,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { Reg.getValueType(), Reg, RC), 0); // Extract the h-register. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, Reg); // Emit a testb. No special NOREX tricks are needed since there's @@ -1930,7 +1930,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Reg = N0.getNode()->getOperand(0); // Extract the 16-bit subregister. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, MVT::i16, Reg); // Emit a testw. @@ -1946,7 +1946,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue Reg = N0.getNode()->getOperand(0); // Extract the 32-bit subregister. - SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, MVT::i32, Reg); // Emit a testl. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6ce9ab7..b02c33d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -94,7 +94,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // X86 is weird, it always uses i8 for shift amounts and setcc results. setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); if (Subtarget->isTargetDarwin()) { @@ -145,13 +145,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); } else if (!UseSoftFloat) { - if (X86ScalarSSEf64) { - // We have an impenetrably clever algorithm for ui64->double only. - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - } + // We have an algorithm for SSE2->double, and we turn this into a + // 64-bit FILD followed by conditional FADD for other targets. + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD for other targets. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have @@ -215,9 +214,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // TODO: when we have SSE, these could be more efficient, by using movd/movq. - if (!X86ScalarSSEf64) { + if (!X86ScalarSSEf64) { setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); + // Without SSE, i64->f64 goes through memory; i64->MMX is Legal. + if (Subtarget->hasMMX() && !DisableMMX) + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom); + else + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + } } // Scalar integer divide and remainder are lowered to use operations that @@ -679,6 +686,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); + + if (!X86ScalarSSEf64 && Subtarget->is64Bit()) { + setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom); + setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); + } } if (!UseSoftFloat && Subtarget->hasSSE1()) { @@ -1244,10 +1259,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); - if (!Reg) { - Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64)); - FuncInfo->setSRetReturnReg(Reg); - } + assert(Reg && + "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); @@ -1384,6 +1397,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, return !Subtarget->is64Bit(); case CallingConv::X86_FastCall: return !Subtarget->is64Bit(); + case CallingConv::X86_ThisCall: + return !Subtarget->is64Bit(); case CallingConv::Fast: return GuaranteedTailCallOpt; case CallingConv::GHC: @@ -1405,6 +1420,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; + else if (CC == CallingConv::X86_ThisCall) + return CC_X86_32_ThisCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else if (CC == CallingConv::GHC) @@ -1596,7 +1613,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (Is64Bit || CallConv != CallingConv::X86_FastCall) { + if (Is64Bit || (CallConv != CallingConv::X86_FastCall && + CallConv != CallingConv::X86_ThisCall)) { FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true, false)); } @@ -1716,7 +1734,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (!Is64Bit) { // RegSaveFrameIndex is X86-64 only. FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); - if (CallConv == CallingConv::X86_FastCall) + if (CallConv == CallingConv::X86_FastCall || + CallConv == CallingConv::X86_ThisCall) // fastcc functions can't have varargs. FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } @@ -5272,7 +5291,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. - MFI->setHasCalls(true); + MFI->setAdjustsStack(true); SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); @@ -5462,7 +5481,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, } SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, - SDValue StackSlot, + SDValue StackSlot, SelectionDAG &DAG) const { // Build the FILD DebugLoc dl = Op.getDebugLoc(); @@ -5636,35 +5655,72 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't + // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); EVT SrcVT = N0.getValueType(); - if (SrcVT == MVT::i64) { - // We only handle SSE2 f64 target here; caller can expand the rest. - if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) - return SDValue(); - + EVT DstVT = Op.getValueType(); + if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG); - } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) { + else if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG); - } - - assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!"); // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); - SDValue WordOff = DAG.getConstant(4, getPointerTy()); - SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, - getPointerTy(), StackSlot, WordOff); - SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + if (SrcVT == MVT::i32) { + SDValue WordOff = DAG.getConstant(4, getPointerTy()); + SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, + getPointerTy(), StackSlot, WordOff); + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + StackSlot, NULL, 0, false, false, 0); + SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), + OffsetSlot, NULL, 0, false, false, 0); + SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + return Fild; + } + + assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, NULL, 0, false, false, 0); - SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0, false, false, 0); - return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + // For i64 source, we need to add the appropriate power of 2 if the input + // was negative. This is the same as the optimization in + // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, + // we must be careful to do the computation in x87 extended precision, not + // in SSE. (The generic code can't know it's OK to do this, or how to.) + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); + SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; + SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3); + + APInt FF(32, 0x5F800000ULL); + + // Check whether the sign bit is set. + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), + Op.getOperand(0), DAG.getConstant(0, MVT::i64), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool( + ConstantInt::get(*DAG.getContext(), FF.zext(64)), + getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset); + + // Load the value out, extending it from f32 to f80. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + FudgePtr, PseudoSourceValue::getConstantPool(), + 0, MVT::f32, false, false, 4); + // Extend everything to 80 bits to force it to be done on x87. + SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); + return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); } std::pair<SDValue,SDValue> X86TargetLowering:: @@ -6593,221 +6649,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops1, 2, dl); } -SDValue -X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const { - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - - // If not DWORD aligned or size is more than the threshold, call the library. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - if ((Align & 3) != 0 || - !ConstantSize || - ConstantSize->getZExtValue() > - getSubtarget()->getMaxInlineSizeThreshold()) { - SDValue InFlag(0, 0); - - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); - - if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { - EVT IntPtr = getPointerTy(); - const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - std::pair<SDValue,SDValue> CallResult = - LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); - return CallResult.second; - } - - // Otherwise have the target-independent code call memset. - return SDValue(); - } - - uint64_t SizeVal = ConstantSize->getZExtValue(); - SDValue InFlag(0, 0); - EVT AVT; - SDValue Count; - ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); - unsigned BytesLeft = 0; - bool TwoRepStos = false; - if (ValC) { - unsigned ValReg; - uint64_t Val = ValC->getZExtValue() & 255; - - // If the value is a constant, then we can potentially use larger sets. - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - ValReg = X86::AX; - Val = (Val << 8) | Val; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - ValReg = X86::EAX; - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned - AVT = MVT::i64; - ValReg = X86::RAX; - Val = (Val << 32) | Val; - } - break; - default: // Byte aligned - AVT = MVT::i8; - ValReg = X86::AL; - Count = DAG.getIntPtrConstant(SizeVal); - break; - } - - if (AVT.bitsGT(MVT::i8)) { - unsigned UBytes = AVT.getSizeInBits() / 8; - Count = DAG.getIntPtrConstant(SizeVal / UBytes); - BytesLeft = SizeVal % UBytes; - } - - Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), - InFlag); - InFlag = Chain.getValue(1); - } else { - AVT = MVT::i8; - Count = DAG.getIntPtrConstant(SizeVal); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); - InFlag = Chain.getValue(1); - } - - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); - - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : - X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); - } else if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT AddrVT = Dst.getValueType(); - EVT SizeVT = Size.getValueType(); - - Chain = DAG.getMemset(Chain, dl, - DAG.getNode(ISD::ADD, dl, AddrVT, Dst, - DAG.getConstant(Offset, AddrVT)), - Src, - DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstSV, DstSVOff + Offset); - } - - // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. - return Chain; -} - -SDValue -X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { - // This requires the copy size to be a constant, preferrably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) - return SDValue(); - - /// If not DWORD aligned, call the library. - if ((Align & 3) != 0) - return SDValue(); - - // DWORD aligned - EVT AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; - - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal); - unsigned BytesLeft = SizeVal % UBytes; - - SDValue InFlag(0, 0); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : - X86::ESI, - Src, InFlag); - InFlag = Chain.getValue(1); - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, - array_lengthof(Ops)); - - SmallVector<SDValue, 4> Results; - Results.push_back(RepMovs); - if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT DstVT = Dst.getValueType(); - EVT SrcVT = Src.getValueType(); - EVT SizeVT = Size.getValueType(); - Results.push_back(DAG.getMemcpy(Chain, dl, - DAG.getNode(ISD::ADD, dl, DstVT, Dst, - DAG.getConstant(Offset, DstVT)), - DAG.getNode(ISD::ADD, dl, SrcVT, Src, - DAG.getConstant(Offset, SrcVT)), - DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, AlwaysInline, - DstSV, DstSVOff + Offset, - SrcSV, SrcSVOff + Offset)); - } - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Results[0], Results.size()); -} - SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); @@ -7138,6 +6979,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -7161,6 +7005,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -7298,6 +7143,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, break; } case CallingConv::X86_FastCall: + case CallingConv::X86_ThisCall: case CallingConv::Fast: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td @@ -7630,6 +7476,27 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } +SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op, + SelectionDAG &DAG) const { + EVT SrcVT = Op.getOperand(0).getValueType(); + EVT DstVT = Op.getValueType(); + assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() && + Subtarget->hasMMX() && !DisableMMX) && + "Unexpected custom BIT_CONVERT"); + assert((DstVT == MVT::i64 || + (DstVT.isVector() && DstVT.getSizeInBits()==64)) && + "Unexpected custom BIT_CONVERT"); + // i64 <=> MMX conversions are Legal. + if (SrcVT==MVT::i64 && DstVT.isVector()) + return Op; + if (DstVT==MVT::i64 && SrcVT.isVector()) + return Op; + // MMX <=> MMX conversions are Legal. + if (SrcVT.isVector() && DstVT.isVector()) + return Op; + // All other conversions need to be expanded. + return SDValue(); +} SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); @@ -7699,6 +7566,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SMULO: case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); + case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG); } } @@ -8203,9 +8071,15 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); MachineOperand* argOpers[2 + X86AddrNumOperands]; - for (int i=0; i < 2 + X86AddrNumOperands; ++i) + for (int i=0; i < 2 + X86AddrNumOperands; ++i) { argOpers[i] = &bInstr->getOperand(i+2); + // We use some of the operands multiple times, so conservatively just + // clear any kill flags that might be present. + if (argOpers[i]->isReg() && argOpers[i]->isUse()) + argOpers[i]->setIsKill(false); + } + // x86 address has 5 operands: base, index, scale, displacement, and segment. int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 440601f9..1ef1a7b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -563,7 +563,7 @@ namespace llvm { return !X86ScalarSSEf64 || VT == MVT::f80; } - virtual const X86Subtarget* getSubtarget() const { + const X86Subtarget* getSubtarget() const { return Subtarget; } @@ -677,6 +677,7 @@ namespace llvm { SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) const; + SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const; SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; @@ -743,23 +744,6 @@ namespace llvm { void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, unsigned NewOp) const; - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; - /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. /// This takes the instruction to expand, the associated machine basic diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index f5c3dbf..97eb17c 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -18,7 +18,9 @@ // // 64-bits but only 32 bits are significant. -def i64i32imm : Operand<i64>; +def i64i32imm : Operand<i64> { + let ParserMatchClass = ImmSExti64i32AsmOperand; +} // 64-bits but only 32 bits are significant, and those bits are treated as being // pc relative. @@ -30,7 +32,7 @@ def i64i32imm_pcrel : Operand<i64> { // 64-bits but only 8 bits are significant. def i64i8imm : Operand<i64> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti64i8AsmOperand; } // Special i64mem for addresses of load folding tail calls. These are not @@ -198,6 +200,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; + let mayLoad = 1 in def TCRETURNmi64 : I<0, Pseudo, (outs), (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -208,6 +211,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; + let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; } @@ -241,6 +245,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; +let mayLoad = 1 in def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; @@ -267,14 +272,16 @@ def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), "push{q}\t$imm", []>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), "push{q}\t$imm", []>; -def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), +def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), "push{q}\t$imm", []>; } -let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in -def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf{q}", []>, REX_W; -let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in -def PUSHFQ64 : I<0x9C, RawFrm, (outs), (ins), "pushf{q}", []>; +let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in +def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>, + Requires<[In64BitMode]>; +let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in +def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>, + Requires<[In64BitMode]>; def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src), @@ -309,16 +316,22 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), } // Defs = [EFLAGS] // Repeat string ops -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", [(X86rep_movs i64)]>, REP; -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)]>, REP; -def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>; +let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in +def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>; + +let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in +def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>; -def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>; + +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>; // Fast system-call instructions def SYSEXIT64 : RI<0x35, RawFrm, @@ -341,8 +354,17 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), [(set GR64:$dst, i64immSExt32:$src)]>; } +// The assembler accepts movq of a 64-bit immediate as an alternate spelling of +// movabsq. +let isAsmParserOnly = 1 in { +def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), + "mov{q}\t{$src, $dst|$dst, $src}", []>; +} + +let isCodeGenOnly = 1 in { def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>; +} let canFoldAsLoad = 1, isReMaterializable = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), @@ -398,9 +420,9 @@ def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; // Moves to and from control registers -def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG_64:$src), +def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_64:$dst), (ins GR64:$src), +def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; // Sign/Zero extenders @@ -478,7 +500,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{ // In the case of a 32-bit def that is known to implicitly zero-extend, // we can use a SUBREG_TO_REG. def : Pat<(i64 (zext def32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; let neverHasSideEffects = 1 in { let Defs = [RAX], Uses = [EAX] in @@ -496,7 +518,7 @@ let neverHasSideEffects = 1 in { let Defs = [EFLAGS] in { -def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i32imm:$src), +def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src), "add{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -555,7 +577,7 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), let Uses = [EFLAGS] in { -def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i32imm:$src), +def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src), "adc{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -565,9 +587,11 @@ def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst), (ins GR64:$src1, GR64:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", []>; +} def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), @@ -605,9 +629,11 @@ def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86sub_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", []>; +} // Register-Memory Subtraction def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), @@ -629,7 +655,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress -def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src), +def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src), "sub{q}\t{$src, %rax|%rax, $src}", []>; // Memory-Register Subtraction @@ -657,9 +683,11 @@ def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", []>; +} def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), @@ -676,7 +704,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress -def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i32imm:$src), +def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src), "sbb{q}\t{$src, %rax|%rax, $src}", []>; def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -1076,7 +1104,7 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)]>; let Defs = [EFLAGS] in { -def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i32imm:$src), +def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src), "and{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -1086,9 +1114,11 @@ def AND64rr : RI<0x21, MRMDestReg, "and{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86and_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", []>; +} def AND64rm : RI<0x23, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "and{q}\t{$src2, $dst|$dst, $src2}", @@ -1129,9 +1159,11 @@ def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86or_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", []>; +} def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "or{q}\t{$src2, $dst|$dst, $src2}", @@ -1162,7 +1194,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src), +def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src), "or{q}\t{$src, %rax|%rax, $src}", []>; let isTwoAddress = 1 in { @@ -1172,9 +1204,11 @@ def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86xor_flag GR64:$src1, GR64:$src2))]>; +let isCodeGenOnly = 1 in { def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", []>; +} def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "xor{q}\t{$src2, $dst|$dst, $src2}", @@ -1205,7 +1239,7 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), +def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src), "xor{q}\t{$src, %rax|%rax, $src}", []>; } // Defs = [EFLAGS] @@ -1216,7 +1250,7 @@ def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), // Integer comparison let Defs = [EFLAGS] in { -def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src), +def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src), "test{q}\t{$src, %rax|%rax, $src}", []>; let isCommutable = 1 in def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), @@ -1238,7 +1272,7 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), i64immSExt32:$src2), 0))]>; -def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src), +def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src), "cmp{q}\t{$src, %rax|%rax, $src}", []>; def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", @@ -1293,7 +1327,8 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB, + REX_W; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. @@ -1714,11 +1749,13 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src), def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; @@ -1730,7 +1767,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), "xchg{q}\t{$src, %rax|%rax, $src}", []>; // Optimized codegen when the non-memory output is not used. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" @@ -1982,14 +2019,14 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; // defined after an extload. def : Pat<(extloadi64i32 addr:$src), (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), - x86_subreg_32bit)>; + sub_32bit)>; // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; def : Pat<(i64 (anyext GR32:$src)), - (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; + (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; //===----------------------------------------------------------------------===// // Some peepholes @@ -2016,54 +2053,54 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm), (SUBREG_TO_REG (i64 0), (AND32ri - (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), + (EXTRACT_SUBREG GR64:$src, sub_32bit), (i32 (GetLo32XForm imm:$imm))), - x86_subreg_32bit)>; + sub_32bit)>; // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), - (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; + (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), - (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; + (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; // r & (2^8-1) ==> movz def : Pat<(and GR64:$src, 0xff), - (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>; + (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), - (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>, + (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>, Requires<[In64BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), - (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>, + (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>, Requires<[In64BitMode]>; // sext_inreg patterns def : Pat<(sext_inreg GR64:$src, i32), - (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; + (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; def : Pat<(sext_inreg GR64:$src, i16), - (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; + (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; def : Pat<(sext_inreg GR64:$src, i8), - (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>; + (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; def : Pat<(sext_inreg GR32:$src, i8), - (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>, + (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>, Requires<[In64BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), - (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>, + (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>, Requires<[In64BitMode]>; // trunc patterns def : Pat<(i32 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>; + (EXTRACT_SUBREG GR64:$src, sub_32bit)>; def : Pat<(i16 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>; + (EXTRACT_SUBREG GR64:$src, sub_16bit)>; def : Pat<(i8 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>; + (EXTRACT_SUBREG GR64:$src, sub_8bit)>; def : Pat<(i8 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>, + (EXTRACT_SUBREG GR32:$src, sub_8bit)>, Requires<[In64BitMode]>; def : Pat<(i8 (trunc GR16:$src)), - (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>, + (EXTRACT_SUBREG GR16:$src, sub_8bit)>, Requires<[In64BitMode]>; // h-register tricks. @@ -2079,67 +2116,67 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_16bit)>, + sub_8bit_hi)), + sub_16bit)>, Requires<[In64BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_32bit)>; + sub_8bit_hi)), + sub_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - x86_subreg_8bit_hi))>; + sub_8bit_hi))>; def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In64BitMode]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a21bfb9..34e12ca 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -744,17 +744,17 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, case X86::MOVZX32rr8: case X86::MOVSX64rr8: case X86::MOVZX64rr8: - SubIdx = 1; + SubIdx = X86::sub_8bit; break; case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: case X86::MOVZX64rr16: - SubIdx = 3; + SubIdx = X86::sub_16bit; break; case X86::MOVSX64rr32: case X86::MOVZX64rr32: - SubIdx = 4; + SubIdx = X86::sub_32bit; break; } return true; @@ -1065,7 +1065,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(I); + DebugLoc DL = Orig->getDebugLoc(); if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = TRI->getSubReg(DestReg, SubIdx); @@ -1154,7 +1154,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) .addReg(leaInReg) .addReg(Src, getKillRegState(isKill)) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); @@ -1198,7 +1198,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) .addReg(leaInReg2) .addReg(Src2, getKillRegState(isKill2)) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); addRegReg(MIB, leaInReg, true, leaInReg2, true); } if (LV && isKill2 && InsMI2) @@ -1212,7 +1212,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(leaOutReg, RegState::Kill) - .addImm(X86::SUBREG_16BIT); + .addImm(X86::sub_16bit); if (LV) { // Update live variables @@ -1901,8 +1901,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL = MBB.findDebugLoc(MI); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { // Determine if DstRC and SrcRC have a common superclass in common. const TargetRegisterClass *CommonRC = DestRC; @@ -1993,12 +1993,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (SrcReg != X86::EFLAGS) return false; if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); + BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; } else if (DestRC == &X86::GR32RegClass || DestRC == &X86::GR32_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSHFD)); + BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return true; } @@ -2007,12 +2007,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, return false; if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); - BuildMI(MBB, MI, DL, get(X86::POPFQ)); + BuildMI(MBB, MI, DL, get(X86::POPF64)); return true; } else if (SrcRC == &X86::GR32RegClass || DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); - BuildMI(MBB, MI, DL, get(X86::POPFD)); + BuildMI(MBB, MI, DL, get(X86::POPF32)); return true; } } @@ -2133,7 +2133,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC) const { + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); @@ -2230,7 +2231,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg, void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC) const{ + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF); unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); @@ -2256,7 +2258,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2284,7 +2287,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass, + &RI); } } @@ -2294,7 +2298,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -2314,7 +2319,7 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (RegClass != &X86::VR128RegClass && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); } } return true; @@ -2478,9 +2483,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned DstReg = NewMI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(DstReg)) NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, - 4/*x86_subreg_32bit*/)); + X86::sub_32bit)); else - NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); + NewMI->getOperand(0).setSubReg(X86::sub_32bit); } return NewMI; } @@ -2526,9 +2531,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (MI->getOpcode()) { default: return NULL; case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break; } // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -2595,9 +2600,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (MI->getOpcode()) { default: return NULL; case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri8; break; } // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); @@ -2805,16 +2810,22 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: + case X86::CMP64ri8: case X86::CMP32ri: + case X86::CMP32ri8: case X86::CMP16ri: + case X86::CMP16ri8: case X86::CMP8ri: { MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); if (MO1.getImm() == 0) { switch (DataMI->getOpcode()) { default: break; + case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; + case X86::CMP32ri8: case X86::CMP32ri: NewOpc = X86::TEST32rr; break; + case X86::CMP16ri8: case X86::CMP16ri: NewOpc = X86::TEST16rr; break; case X86::CMP8ri: NewOpc = X86::TEST8rr; break; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index df99c7f..62d7c74 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -590,11 +590,13 @@ public: MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl<MachineOperand> &Addr, @@ -606,7 +608,8 @@ public: virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl<MachineOperand> &Addr, @@ -617,11 +620,13 @@ public: virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index a2754ea..0d59c42 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -195,15 +195,15 @@ def ptr_rc_nosp : PointerLikeRegClass<1>; // def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; - let SuperClass = ?; -} -def X86AbsMemAsmOperand : AsmOperandClass { - let Name = "AbsMem"; - let SuperClass = X86MemAsmOperand; + let SuperClasses = []; } def X86NoSegMemAsmOperand : AsmOperandClass { let Name = "NoSegMem"; - let SuperClass = X86MemAsmOperand; + let SuperClasses = [X86MemAsmOperand]; +} +def X86AbsMemAsmOperand : AsmOperandClass { + let Name = "AbsMem"; + let SuperClasses = [X86NoSegMemAsmOperand]; } class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; @@ -270,19 +270,49 @@ def SSECC : Operand<i8> { let PrintMethod = "printSSECC"; } -def ImmSExt8AsmOperand : AsmOperandClass { - let Name = "ImmSExt8"; - let SuperClass = ImmAsmOperand; +class ImmSExtAsmOperandClass : AsmOperandClass { + let SuperClasses = [ImmAsmOperand]; + let RenderMethod = "addImmOperands"; +} + +// Sign-extended immediate classes. We don't need to define the full lattice +// here because there is no instruction with an ambiguity between ImmSExti64i32 +// and ImmSExti32i8. +// +// The strange ranges come from the fact that the assembler always works with +// 64-bit immediates, but for a 16-bit target value we want to accept both "-1" +// (which will be a -1ULL), and "0xFF" (-1 in 16-bits). + +// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] +def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti64i32"; +} + +// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti16i8"; + let SuperClasses = [ImmSExti64i32AsmOperand]; +} + +// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti32i8"; +} + +// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { + let Name = "ImmSExti64i8"; + let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand]; } // A couple of more descriptive operand definitions. // 16-bits but only 8 bits are significant. def i16i8imm : Operand<i16> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti16i8AsmOperand; } // 32-bits but only 8 bits are significant. def i32i8imm : Operand<i32> { - let ParserMatchClass = ImmSExt8AsmOperand; + let ParserMatchClass = ImmSExti32i8AsmOperand; } //===----------------------------------------------------------------------===// @@ -542,8 +572,10 @@ let neverHasSideEffects = 1 in { } // Trap -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>; -def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; +def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>; +// FIXME: need to make sure that "int $3" matches int3 +def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; @@ -693,6 +725,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TCRETURNri : I<0, Pseudo, (outs), (ins GR32_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; + let mayLoad = 1 in def TCRETURNmi : I<0, Pseudo, (outs), (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -706,8 +739,16 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; + let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; + + // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL + // marker on instructions, while still being able to relax. + let isCodeGenOnly = 1 in { + def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), + "jmp\t$dst # TAILCALL", []>; + } } //===----------------------------------------------------------------------===// @@ -719,10 +760,12 @@ def LEAVE : I<0xC9, RawFrm, def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; +let mayLoad = 1 in def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; +let mayLoad = 1 in def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; @@ -762,12 +805,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), } let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in { -def POPF : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; -def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf{l}", []>; +def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize; +def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, + Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in { -def PUSHF : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; -def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf{l}", []>; +def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize; +def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, + Requires<[In32BitMode]>; } let isTwoAddress = 1 in // GR32 = bswap GR32 @@ -867,7 +912,7 @@ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, let Defs = [RAX, RCX, RDX] in def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; -let isBarrier = 1, hasCtrlDep = 1 in { +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } @@ -966,36 +1011,47 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; -def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src), +/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a +/// 32-bit offset from the PC. These are only valid in x86-32 mode. +def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|%al, $src}", []>; -def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src), +def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), "mov{l}\t{$src, %eax|%eax, $src}", []>; - -def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins), +def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{b}\t{%al, $dst|$dst, %al}", []>; -def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins), +def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>; - + // Moves to and from segment registers def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), - "mov{w}\t{$src, $dst|$dst, $src}", []>; + "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; +def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>; +let isCodeGenOnly = 1 in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", []>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>; +} let canFoldAsLoad = 1, isReMaterializable = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), @@ -1059,10 +1115,10 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; // Moves to and from control registers -def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG_32:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; -def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), - "mov{q}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; +def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), + "mov{l}\t{$src, $dst|$dst, $src}", []>, TB; //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions... @@ -1746,6 +1802,7 @@ def AND32rr : I<0x21, MRMDestReg, // AND instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "and{b}\t{$src2, $dst|$dst, $src2}", []>; def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), @@ -1754,6 +1811,7 @@ def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst), def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "and{l}\t{$src2, $dst|$dst, $src2}", []>; +} def AND8rm : I<0x22, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), @@ -1872,6 +1930,7 @@ def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst), // OR instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "or{b}\t{$src2, $dst|$dst, $src2}", []>; def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), @@ -1880,6 +1939,7 @@ def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst), def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "or{l}\t{$src2, $dst|$dst, $src2}", []>; +} def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), @@ -1988,6 +2048,7 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y // XOR instructions with the destination register in REG and the source register // in R/M. Included for the disassembler. +let isCodeGenOnly = 1 in { def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "xor{b}\t{$src2, $dst|$dst, $src2}", []>; def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), @@ -1996,6 +2057,7 @@ def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst), def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "xor{l}\t{$src2, $dst|$dst, $src2}", []>; +} def XOR8rm : I<0x32, MRMSrcMem, (outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2), @@ -2793,6 +2855,7 @@ def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst), [(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>; } +let isCodeGenOnly = 1 in { def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "adc{b}\t{$src2, $dst|$dst, $src2}", []>; def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), @@ -2801,6 +2864,7 @@ def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst), def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "adc{l}\t{$src2, $dst|$dst, $src2}", []>; +} def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), @@ -2888,6 +2952,7 @@ def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), [(set GR32:$dst, EFLAGS, (X86sub_flag GR32:$src1, GR32:$src2))]>; +let isCodeGenOnly = 1 in { def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", []>; def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), @@ -2896,6 +2961,7 @@ def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst), def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", []>; +} // Register-Memory Subtraction def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), @@ -3039,6 +3105,7 @@ let isTwoAddress = 0 in { "sbb{l}\t{$src, %eax|%eax, $src}", []>; } +let isCodeGenOnly = 1 in { def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", []>; def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), @@ -3047,6 +3114,7 @@ def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst), def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "sbb{l}\t{$src2, $dst|$dst, $src2}", []>; +} def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", @@ -3864,12 +3932,14 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +} def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; @@ -3878,12 +3948,14 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +} let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), @@ -3891,7 +3963,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), // Optimized codegen when the non-memory output is not used. // FIXME: Use normal add / sub instructions and add lock prefix dynamically. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "lock\n\t" "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; @@ -4453,7 +4525,7 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; //===----------------------------------------------------------------------===// @@ -4473,81 +4545,81 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), // r & (2^16-1) ==> movz def : Pat<(and GR32:$src1, 0xffff), - (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>; + (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1, GR32_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // r & (2^8-1) ==> movz def : Pat<(and GR16:$src1, 0xff), (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1, GR16_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // sext_inreg patterns def : Pat<(sext_inreg GR32:$src, i16), - (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; + (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; def : Pat<(sext_inreg GR32:$src, i8), (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; def : Pat<(sext_inreg GR16:$src, i8), (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit))>, + sub_8bit))>, Requires<[In32BitMode]>; // trunc patterns def : Pat<(i16 (trunc GR32:$src)), - (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>; + (EXTRACT_SUBREG GR32:$src, sub_16bit)>; def : Pat<(i8 (trunc GR32:$src)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit)>, + sub_8bit)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc GR16:$src)), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit)>, + sub_8bit)>, Requires<[In32BitMode]>; // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)>, + sub_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi)>, + sub_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi)), - x86_subreg_16bit)>, + sub_8bit_hi)), + sub_16bit)>, Requires<[In32BitMode]>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), - x86_subreg_8bit_hi))>, + sub_8bit_hi))>, Requires<[In32BitMode]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 744af50..0952fc8 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -117,9 +117,6 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs), - (ins i64mem:$dst, VR64:$src), - "movq\t{$src, $dst|$dst, $src}", []>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -133,10 +130,10 @@ let neverHasSideEffects = 1 in def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), - "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, - (v1i64 (scalar_to_vector GR64:$src)))]>; +def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v1i64 (scalar_to_vector GR64:$src)))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2129580..5580ba7 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -117,17 +117,17 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; -def alignedloadfsf32 : PatFrag<(ops node:$ptr), +def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; -def alignedloadfsf64 : PatFrag<(ops node:$ptr), +def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>; -def alignedloadv4f32 : PatFrag<(ops node:$ptr), +def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>; -def alignedloadv2f64 : PatFrag<(ops node:$ptr), +def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>; -def alignedloadv4i32 : PatFrag<(ops node:$ptr), +def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>; -def alignedloadv2i64 : PatFrag<(ops node:$ptr), +def alignedloadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (alignedload node:$ptr))>; // Like 'load', but uses special alignment checks suitable for use in @@ -387,11 +387,11 @@ def MOVSSrr : SSI<0x10, MRMSrcReg, let AddedComplexity = 15 in def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>; + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; // Loading from memory automatically zeroing upper bits. let canFoldAsLoad = 1, isReMaterializable = 1 in @@ -403,11 +403,11 @@ def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), // with SUBREG_TO_REG. let AddedComplexity = 20 in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>; + (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; } // Store scalar value to memory. @@ -419,7 +419,7 @@ def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; // Conversion instructions def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), @@ -449,7 +449,7 @@ def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), [(set GR32:$dst, (int_x86_sse_cvtss2si (load addr:$src)))]>; -// Match intrinisics which expect MM and XMM operand(s). +// Match intrinsics which expect MM and XMM operand(s). def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvtps2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>; @@ -509,6 +509,17 @@ let mayLoad = 1 in def CMPSSrm : SSIi8<0xC2, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg, + (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), + "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +let mayLoad = 1 in + def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem, + (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), + "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } let Defs = [EFLAGS] in { @@ -518,25 +529,25 @@ def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; - + def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comiss\t{$src2, $src1|$src1, $src2}", []>; def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "comiss\t{$src2, $src1|$src1, $src2}", []>; - + } // Defs = [EFLAGS] // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss + [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, VR128:$src, imm:$cc))]>; def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, @@ -1009,6 +1020,16 @@ let Constraints = "$src1 = $dst" in { "cmp${cc}ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, (memop addr:$src), imm:$cc))]>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), + "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; + def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), + "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; +} } def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; @@ -1102,7 +1123,8 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), } // Load, store, and memory fence -def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>; +def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, + TB, Requires<[HasSSE1]>; // MXCSR register def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), @@ -1130,7 +1152,7 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>; + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; //===---------------------------------------------------------------------===// // SSE2 Instructions @@ -1152,11 +1174,11 @@ def MOVSDrr : SDI<0x10, MRMSrcReg, let AddedComplexity = 15 in def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; // Implicitly promote a 64-bit scalar to a vector. def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>; + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; // Loading from memory automatically zeroing upper bits. let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in @@ -1168,15 +1190,15 @@ def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), // with SUBREG_TO_REG. let AddedComplexity = 20 in { def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>; + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; } // Store scalar value to memory. @@ -1188,7 +1210,7 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; // Conversion instructions def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), @@ -1255,7 +1277,7 @@ def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), [(set GR32:$dst, (int_x86_sse2_cvtsd2si (load addr:$src)))]>; -// Match intrinisics which expect MM and XMM operand(s). +// Match intrinsics which expect MM and XMM operand(s). def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; @@ -1297,6 +1319,17 @@ let mayLoad = 1 in def CMPSDrm : SDIi8<0xC2, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg, + (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), + "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +let mayLoad = 1 in + def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem, + (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), + "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } let Defs = [EFLAGS] in { @@ -1311,13 +1344,13 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, VR128:$src, imm:$cc))]>; def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, @@ -1655,7 +1688,7 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, + [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))]>, XS, Requires<[HasSSE2]>; def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -1890,6 +1923,16 @@ let Constraints = "$src1 = $dst" in { "cmp${cc}pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, (memop addr:$src), imm:$cc))]>; + + // Accept explicit immediate argument form instead of comparison code. +let isAsmParserOnly = 1 in { + def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), + "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; + def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), + "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +} } def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; @@ -1980,24 +2023,24 @@ let Constraints = "$src1 = $dst" in { multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> { let isCommutable = Commutable; } - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv2i64 + (bitconvert (memopv2i64 addr:$src2))))]>; } multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; @@ -2006,7 +2049,7 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>; - def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), + def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; @@ -2015,13 +2058,13 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), + def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> { let isCommutable = Commutable; } - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), + def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, @@ -2416,6 +2459,10 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +// Pause. This "instruction" is encoded as "rep; nop", so even though it +// was introduced with SSE2, it's backward compatible. +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; + //TODO: custom lower this so as to never even generate the noop def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 0)), (NOOP)>; @@ -2462,7 +2509,7 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), (iPTR 0))), addr:$dst)]>; def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>; + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", @@ -2903,7 +2950,7 @@ defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw, int_x86_ssse3_pmul_hr_sw_128, 1>; - + defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", int_x86_ssse3_pshuf_b, int_x86_ssse3_pshuf_b_128>; @@ -3042,10 +3089,10 @@ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), (MOVSSrr (v4f32 (V_SET0PS)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>; + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), (MOVSSrr (v4i32 (V_SET0PI)), - (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; } // Splat v2f64 / v2i64 @@ -3181,17 +3228,17 @@ let AddedComplexity = 15 in { // Setting the lowest element in the vector. def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>; + (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)), (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>; + (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, Requires<[HasSSE2]>; def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>, + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>, Requires<[HasSSE2]>; } @@ -3464,14 +3511,14 @@ let Constraints = "$src1 = $dst" in { let Constraints = "$src1 = $dst" in { multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, OpSize { let isCommutable = Commutable; } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR128:$dst, (OpNode VR128:$src1, @@ -3949,15 +3996,15 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "#PCMPESTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, OpSize; } @@ -3972,7 +4019,7 @@ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), let Defs = [ECX, EFLAGS] in { multiclass SS42AI_pcmpistri<Intrinsic IntId128> { - def rr : SS42AI<0x63, MRMSrcReg, (outs), + def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), @@ -4003,7 +4050,7 @@ let Uses = [EAX, EDX] in { def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, + [(set ECX, (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), (implicit EFLAGS)]>, OpSize; } @@ -4081,16 +4128,15 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), OpSize; def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), + (ins VR128:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, OpSize; def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src1, i32i8imm:$src2), + (ins i128mem:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, OpSize; - diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a3e04b0..98975ea 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -37,7 +37,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -145,6 +144,19 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::XMM7: case X86::XMM15: case X86::MM7: return 7; + case X86::ES: + return 0; + case X86::CS: + return 1; + case X86::SS: + return 2; + case X86::DS: + return 3; + case X86::FS: + return 4; + case X86::GS: + return 5; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -158,8 +170,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, unsigned SubIdx) const { switch (SubIdx) { default: return 0; - case 1: - // 8-bit + case X86::sub_8bit: if (B == &X86::GR8RegClass) { if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) return A; @@ -191,12 +202,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR16_NOREXRegClass; else if (A == &X86::GR16_ABCDRegClass) return &X86::GR16_ABCDRegClass; - } else if (B == &X86::FR32RegClass) { - return A; } break; - case 2: - // 8-bit hi + case X86::sub_8bit_hi: if (B == &X86::GR8_ABCD_HRegClass) { if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || A == &X86::GR64_NOREXRegClass || @@ -209,12 +217,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || A == &X86::GR16_NOREXRegClass) return &X86::GR16_ABCDRegClass; - } else if (B == &X86::FR64RegClass) { - return A; } break; - case 3: - // 16-bit + case X86::sub_16bit: if (B == &X86::GR16RegClass) { if (A->getSize() == 4 || A->getSize() == 8) return A; @@ -238,12 +243,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR32_NOREXRegClass; else if (A == &X86::GR32_ABCDRegClass) return &X86::GR64_ABCDRegClass; - } else if (B == &X86::VR128RegClass) { - return A; } break; - case 4: - // 32-bit + case X86::sub_32bit: if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { if (A->getSize() == 8) return A; @@ -261,6 +263,18 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return &X86::GR64_ABCDRegClass; } break; + case X86::sub_ss: + if (B == &X86::FR32RegClass) + return A; + break; + case X86::sub_sd: + if (B == &X86::FR64RegClass) + return A; + break; + case X86::sub_xmm: + if (B == &X86::VR128RegClass) + return A; + break; } return 0; } @@ -518,6 +532,30 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const { return Offset; } +static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::SUB64ri8; + return X86::SUB64ri32; + } else { + if (isInt<8>(Imm)) + return X86::SUB32ri8; + return X86::SUB32ri; + } +} + +static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) { + if (is64Bit) { + if (isInt<8>(Imm)) + return X86::ADD64ri8; + return X86::ADD64ri32; + } else { + if (isInt<8>(Imm)) + return X86::ADD32ri8; + return X86::ADD32ri; + } +} + void X86RegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -537,7 +575,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *New = 0; if (Old->getOpcode() == getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), - TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), + TII.get(getSUBriOpcode(Is64Bit, Amount)), StackPtr) .addReg(StackPtr) .addImm(Amount); @@ -549,9 +587,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Amount -= CalleeAmt; if (Amount) { - unsigned Opc = (Amount < 128) ? - (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : - (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); + unsigned Opc = getADDriOpcode(Is64Bit, Amount); New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) .addReg(StackPtr) .addImm(Amount); @@ -571,9 +607,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // something off the stack pointer, add it back. We do this until we have // more advanced stack pointer tracking ability. if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { - unsigned Opc = (CalleeAmt < 128) ? - (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : - (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri); + unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt); MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), @@ -691,13 +725,9 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const TargetInstrInfo &TII) { bool isSub = NumBytes < 0; uint64_t Offset = isSub ? -NumBytes : NumBytes; - unsigned Opc = isSub - ? ((Offset < 128) ? - (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) : - (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri)) - : ((Offset < 128) ? - (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : - (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri)); + unsigned Opc = isSub ? + getSUBriOpcode(Is64Bit, Offset) : + getADDriOpcode(Is64Bit, Offset); uint64_t Chunk = (1LL << 31) - 1; DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -899,7 +929,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; @@ -917,7 +947,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // size is bigger than the callers. if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), + BuildMI(MBB, MBBI, DL, + TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)), StackPtr) .addReg(StackPtr) .addImm(-TailCallReturnAddrDelta); @@ -1307,7 +1338,7 @@ X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const { // Calculate amount of bytes used for return address storing int stackGrowth = (Is64Bit ? -8 : -4); - // Initial state of the frame pointer is esp+4. + // Initial state of the frame pointer is esp+stackGrowth. MachineLocation Dst(MachineLocation::VirtualFP); MachineLocation Src(StackPtr, stackGrowth); Moves.push_back(MachineMove(0, Dst, Src)); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index ac96c4c..d0b82e2 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -30,16 +30,6 @@ namespace N86 { }; } -namespace X86 { - /// SubregIndex - The index of various sized subregister classes. Note that - /// these indices must be kept in sync with the class indices in the - /// X86RegisterInfo.td file. - enum SubregIndex { - SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4, - SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3 - }; -} - /// DWARFFlavour - Flavour of dwarf regnumbers /// namespace DWARFFlavour { diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 49a6ca0..91cfaa9 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -18,6 +18,17 @@ // let Namespace = "X86" in { + // Subregister indices. + def sub_8bit : SubRegIndex; + def sub_8bit_hi : SubRegIndex; + def sub_16bit : SubRegIndex; + def sub_32bit : SubRegIndex; + + def sub_ss : SubRegIndex; + def sub_sd : SubRegIndex; + def sub_xmm : SubRegIndex; + + // In the register alias definitions below, we define which registers alias // which others. We only specify which registers the small registers alias, // because the register file generator is smart enough to figure out that @@ -57,17 +68,22 @@ let Namespace = "X86" in { def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>; // 16-bit registers + let SubRegIndices = [sub_8bit, sub_8bit_hi] in { def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>; def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>; def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>; def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>; + } + let SubRegIndices = [sub_8bit] in { def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>; def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>; def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>; def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>; + } def IP : Register<"ip">, DwarfRegNum<[16]>; // X86-64 only + let SubRegIndices = [sub_8bit] in { def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>; def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>; def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>; @@ -76,8 +92,9 @@ let Namespace = "X86" in { def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>; def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>; def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>; - + } // 32-bit registers + let SubRegIndices = [sub_16bit] in { def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>; def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>; def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>; @@ -97,8 +114,10 @@ let Namespace = "X86" in { def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>; def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>; def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>; + } // 64-bit registers, X86-64 only + let SubRegIndices = [sub_32bit] in { def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>; def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>; def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>; @@ -117,6 +136,7 @@ let Namespace = "X86" in { def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>; def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>; def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>; + } // MMX Registers. These are actually aliased to ST0 .. ST7 def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>; @@ -137,7 +157,9 @@ let Namespace = "X86" in { def FP5 : Register<"fp5">; def FP6 : Register<"fp6">; - // XMM Registers, used by the various SSE instruction set extensions + // XMM Registers, used by the various SSE instruction set extensions. + // The sub_ss and sub_sd subregs are the same registers with another regclass. + let CompositeIndices = [(sub_ss), (sub_sd)] in { def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>; def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>; def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>; @@ -156,8 +178,10 @@ let Namespace = "X86" in { def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; + } // YMM Registers, used by AVX instructions + let SubRegIndices = [sub_xmm] in { def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>; def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>; def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>; @@ -174,6 +198,7 @@ let Namespace = "X86" in { def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>; def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>; def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>; + } // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; @@ -207,106 +232,19 @@ let Namespace = "X86" in { def DR7 : Register<"dr7">; // Condition registers - def ECR0 : Register<"ecr0">; - def ECR1 : Register<"ecr1">; - def ECR2 : Register<"ecr2">; - def ECR3 : Register<"ecr3">; - def ECR4 : Register<"ecr4">; - def ECR5 : Register<"ecr5">; - def ECR6 : Register<"ecr6">; - def ECR7 : Register<"ecr7">; - - def RCR0 : Register<"rcr0">; - def RCR1 : Register<"rcr1">; - def RCR2 : Register<"rcr2">; - def RCR3 : Register<"rcr3">; - def RCR4 : Register<"rcr4">; - def RCR5 : Register<"rcr5">; - def RCR6 : Register<"rcr6">; - def RCR7 : Register<"rcr7">; - def RCR8 : Register<"rcr8">; + def CR0 : Register<"cr0">; + def CR1 : Register<"cr1">; + def CR2 : Register<"cr2">; + def CR3 : Register<"cr3">; + def CR4 : Register<"cr4">; + def CR5 : Register<"cr5">; + def CR6 : Register<"cr6">; + def CR7 : Register<"cr7">; + def CR8 : Register<"cr8">; } //===----------------------------------------------------------------------===// -// Subregister Set Definitions... now that we have all of the pieces, define the -// sub registers for each register. -// - -def x86_subreg_8bit : PatLeaf<(i32 1)>; -def x86_subreg_8bit_hi : PatLeaf<(i32 2)>; -def x86_subreg_16bit : PatLeaf<(i32 3)>; -def x86_subreg_32bit : PatLeaf<(i32 4)>; - -def x86_subreg_ss : PatLeaf<(i32 1)>; -def x86_subreg_sd : PatLeaf<(i32 2)>; -def x86_subreg_xmm : PatLeaf<(i32 3)>; - -def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [AX, CX, DX, BX], - [AH, CH, DH, BH]>; - -def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [EAX, ECX, EDX, EBX], - [AH, CH, DH, BH]>; - -def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], - [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [AL, CL, DL, BL, SPL, BPL, SIL, DIL, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>; - -def : SubRegSet<2, [RAX, RCX, RDX, RBX], - [AH, CH, DH, BH]>; - -def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; - -def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15], - [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; - -def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15], - [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; - -//===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is // implicitly defined to be the register allocation order. @@ -370,7 +308,7 @@ def GR8 : RegisterClass<"X86", [i8], 8, def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { - let SubRegClassList = [GR8, GR8]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -422,7 +360,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -477,7 +415,9 @@ def GR32 : RegisterClass<"X86", [i32], 32, def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { - let SubRegClassList = [GR8, GR8, GR16, GR32]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32 sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -511,14 +451,8 @@ def DEBUG_REG : RegisterClass<"X86", [i32], 32, } // Control registers. -def CONTROL_REG_32 : RegisterClass<"X86", [i32], 32, - [ECR0, ECR1, ECR2, ECR3, ECR4, ECR5, ECR6, - ECR7]> { -} - -def CONTROL_REG_64 : RegisterClass<"X86", [i64], 64, - [RCR0, RCR1, RCR2, RCR3, RCR4, RCR5, RCR6, - RCR7, RCR8]> { +def CONTROL_REG : RegisterClass<"X86", [i64], 64, + [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> { } // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of @@ -532,20 +466,27 @@ def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> { def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> { } def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)]; } def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit)]; } def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit), + (GR32_ABCD sub_32bit)]; } def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; } def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R11]> { - let SubRegClassList = [GR8, GR8, GR16, GR32_TC]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32_TC sub_32bit)]; } // GR8_NOREX - GR8 registers which do not require a REX prefix. @@ -585,7 +526,7 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8, // GR16_NOREX - GR16 registers which do not require a REX prefix. def GR16_NOREX : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -608,7 +549,8 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, // GR32_NOREX - GR32 registers which do not require a REX prefix. def GR32_NOREX : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -631,7 +573,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, // GR64_NOREX - GR64 registers which do not require a REX prefix. def GR64_NOREX : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit), + (GR32_NOREX sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -656,7 +600,7 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, def GR32_NOSP : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { - let SubRegClassList = [GR8, GR8, GR16]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -709,7 +653,9 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, def GR64_NOSP : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP]> { - let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP]; + let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), + (GR16 sub_16bit), + (GR32_NOSP sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -734,7 +680,9 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, // GR64_NOREX_NOSP - GR64_NOREX registers except RSP. def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi), + (GR16_NOREX sub_16bit), + (GR32_NOREX sub_32bit)]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -758,7 +706,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, // A class to support the 'A' assembler constraint: EAX then EDX. def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> { - let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; + let SubRegClasses = [(GR8_ABCD_L sub_8bit), + (GR8_ABCD_H sub_8bit_hi), + (GR16_ABCD sub_16bit)]; } // Scalar SSE2 floating point registers. @@ -836,7 +786,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { - let SubRegClassList = [FR32, FR64]; + let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; + let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -856,7 +807,7 @@ def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { - let SubRegClassList = [FR32, FR64, VR128]; + let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; } // Status flags registers. diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index cd87b82..6297a27 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -12,11 +12,232 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "x86-selectiondag-info" -#include "X86SelectionDAGInfo.h" +#include "X86TargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; -X86SelectionDAGInfo::X86SelectionDAGInfo() { +X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : + TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<X86Subtarget>()), + TLI(*TM.getTargetLowering()) { } X86SelectionDAGInfo::~X86SelectionDAGInfo() { } + +SDValue +X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + const Value *DstSV, + uint64_t DstSVOff) const { + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + + // If not DWORD aligned or size is more than the threshold, call the library. + // The libc version is likely to be faster for these cases. It can use the + // address value and run time information about the CPU. + if ((Align & 3) != 0 || + !ConstantSize || + ConstantSize->getZExtValue() > + Subtarget->getMaxInlineSizeThreshold()) { + SDValue InFlag(0, 0); + + // Check to see if there is a specialized entry-point for memory zeroing. + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); + + if (const char *bzeroEntry = V && + V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + EVT IntPtr = TLI.getPointerTy(); + const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair<SDValue,SDValue> CallResult = + TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, false, /*isReturnValueUsed=*/false, + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, + DAG, dl); + return CallResult.second; + } + + // Otherwise have the target-independent code call memset. + return SDValue(); + } + + uint64_t SizeVal = ConstantSize->getZExtValue(); + SDValue InFlag(0, 0); + EVT AVT; + SDValue Count; + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); + unsigned BytesLeft = 0; + bool TwoRepStos = false; + if (ValC) { + unsigned ValReg; + uint64_t Val = ValC->getZExtValue() & 255; + + // If the value is a constant, then we can potentially use larger sets. + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + ValReg = X86::AX; + Val = (Val << 8) | Val; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + ValReg = X86::EAX; + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned + AVT = MVT::i64; + ValReg = X86::RAX; + Val = (Val << 32) | Val; + } + break; + default: // Byte aligned + AVT = MVT::i8; + ValReg = X86::AL; + Count = DAG.getIntPtrConstant(SizeVal); + break; + } + + if (AVT.bitsGT(MVT::i8)) { + unsigned UBytes = AVT.getSizeInBits() / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; + } + + Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), + InFlag); + InFlag = Chain.getValue(1); + } else { + AVT = MVT::i8; + Count = DAG.getIntPtrConstant(SizeVal); + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); + InFlag = Chain.getValue(1); + } + + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + + if (TwoRepStos) { + InFlag = Chain.getValue(1); + Count = Size; + EVT CVT = Count.getValueType(); + SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, + DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); + Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : + X86::ECX, + Left, InFlag); + InFlag = Chain.getValue(1); + Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + } else if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + EVT AddrVT = Dst.getValueType(); + EVT SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, dl, + DAG.getNode(ISD::ADD, dl, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, isVolatile, DstSV, DstSVOff + Offset); + } + + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. + return Chain; +} + +SDValue +X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + return SDValue(); + + /// If not DWORD aligned, call the library. + if ((Align & 3) != 0) + return SDValue(); + + // DWORD aligned + EVT AVT = MVT::i32; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned + AVT = MVT::i64; + + unsigned UBytes = AVT.getSizeInBits() / 8; + unsigned CountVal = SizeVal / UBytes; + SDValue Count = DAG.getIntPtrConstant(CountVal); + unsigned BytesLeft = SizeVal % UBytes; + + SDValue InFlag(0, 0); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : + X86::ESI, + Src, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, + array_lengthof(Ops)); + + SmallVector<SDValue, 4> Results; + Results.push_back(RepMovs); + if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + EVT DstVT = Dst.getValueType(); + EVT SrcVT = Src.getValueType(); + EVT SizeVT = Size.getValueType(); + Results.push_back(DAG.getMemcpy(Chain, dl, + DAG.getNode(ISD::ADD, dl, DstVT, Dst, + DAG.getConstant(Offset, DstVT)), + DAG.getNode(ISD::ADD, dl, SrcVT, Src, + DAG.getConstant(Offset, SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, isVolatile, AlwaysInline, + DstSV, DstSVOff + Offset, + SrcSV, SrcSVOff + Offset)); + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Results[0], Results.size()); +} diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h index 9834754..4f30f31 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.h +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -18,10 +18,40 @@ namespace llvm { +class X86TargetLowering; +class X86TargetMachine; +class X86Subtarget; + class X86SelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// make the right decision when generating code for different targets. + const X86Subtarget *Subtarget; + + const X86TargetLowering &TLI; + public: - X86SelectionDAGInfo(); + explicit X86SelectionDAGInfo(const X86TargetMachine &TM); ~X86SelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + const Value *DstSV, + uint64_t DstSVOff) const; + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; }; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f39904e..f2c5058 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -17,17 +17,13 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" -#include "llvm/Support/CommandLine.h" - using namespace llvm; -static cl::opt<bool> DisableSSEDomain("disable-sse-domain", - cl::init(false), cl::Hidden, - cl::desc("Disable SSE Domain Fixing")); - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -43,6 +39,18 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } } +static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, + MCContext &Ctx, TargetAsmBackend &TAB, + raw_ostream &_OS, + MCCodeEmitter *_Emitter, + bool RelaxAll) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + default: + return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); + } +} + extern "C" void LLVMInitializeX86Target() { // Register the target. RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target); @@ -63,6 +71,12 @@ extern "C" void LLVMInitializeX86Target() { createX86_32AsmBackend); TargetRegistry::RegisterAsmBackend(TheX86_64Target, createX86_64AsmBackend); + + // Register the object streamer. + TargetRegistry::RegisterObjectStreamer(TheX86_32Target, + createMCStreamer); + TargetRegistry::RegisterObjectStreamer(TheX86_64Target, + createMCStreamer); } @@ -88,7 +102,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, Subtarget.getStackAlignment(), (Subtarget.isTargetWin64() ? -40 : (Subtarget.is64Bit() ? -8 : -4))), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this), + ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); // If no relocation model was picked, default as appropriate for the target. @@ -178,8 +193,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() && - !DisableSSEDomain) { + if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { PM.add(createSSEDomainFixPass()); return true; } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index dc4234c..f9fb424 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -23,6 +23,7 @@ #include "X86JITInfo.h" #include "X86Subtarget.h" #include "X86ISelLowering.h" +#include "X86SelectionDAGInfo.h" namespace llvm { @@ -35,6 +36,7 @@ class X86TargetMachine : public LLVMTargetMachine { X86InstrInfo InstrInfo; X86JITInfo JITInfo; X86TargetLowering TLInfo; + X86SelectionDAGInfo TSInfo; X86ELFWriterInfo ELFWriterInfo; Reloc::Model DefRelocModel; // Reloc model before it's overridden. @@ -54,6 +56,9 @@ public: virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } virtual const X86RegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 3990b8b..b230572 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -80,7 +80,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setShiftAmountType(MVT::i32); setStackPointerRegisterToSaveRestore(XCore::SP); - setSchedulingPreference(SchedulingForRegPressure); + setSchedulingPreference(Sched::RegPressure); // Use i32 for setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index c983112..5260258 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -361,9 +361,8 @@ bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { if (DestRC == SrcRC) { if (DestRC == XCore::GRRegsRegisterClass) { @@ -395,7 +394,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -408,7 +408,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -419,7 +420,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const { + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) { return true; } @@ -437,7 +439,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MBB.addLiveIn(it->getReg()); storeRegToStackSlot(MBB, MI, it->getReg(), true, - it->getFrameIdx(), it->getRegClass()); + it->getFrameIdx(), it->getRegClass(), &RI); if (emitFrameMoves) { MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel); @@ -449,7 +451,8 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { bool AtStart = MI == MBB.begin(); MachineBasicBlock::iterator BeforeI = MI; @@ -460,7 +463,7 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(), - it->getRegClass()); + it->getRegClass(), &RI); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert multiple diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 3e0a765..9035ea9 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -67,25 +67,30 @@ public: MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI) const; + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; virtual bool ReverseBranchCondition( SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp index 6aac237..44aeb60 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "xcore-selectiondag-info" -#include "XCoreSelectionDAGInfo.h" +#include "XCoreTargetMachine.h" using namespace llvm; -XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() { +XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() { diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h index fd96716..0386968 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.h +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -18,9 +18,11 @@ namespace llvm { +class XCoreTargetMachine; + class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo { public: - XCoreSelectionDAGInfo(); + explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM); ~XCoreSelectionDAGInfo(); }; diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 267f46a..b0013eb 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -28,7 +28,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), FrameInfo(*this), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 701a6f1..14073ba 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -20,6 +20,7 @@ #include "XCoreSubtarget.h" #include "XCoreInstrInfo.h" #include "XCoreISelLowering.h" +#include "XCoreSelectionDAGInfo.h" namespace llvm { @@ -29,6 +30,7 @@ class XCoreTargetMachine : public LLVMTargetMachine { XCoreInstrInfo InstrInfo; XCoreFrameInfo FrameInfo; XCoreTargetLowering TLInfo; + XCoreSelectionDAGInfo TSInfo; public: XCoreTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -40,6 +42,10 @@ public: return &TLInfo; } + virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const TargetRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } |