summaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-05-27 15:15:58 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-05-27 15:15:58 +0000
commit1e3dec662ea18131c495db50caccc57f77b7a5fe (patch)
tree9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib/Target
parent377552607e51dc1d3e6ff33833f9620bcfe815ac (diff)
downloadFreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip
FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz
Update LLVM to r104832.
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h8
-rw-r--r--lib/Target/ARM/ARM.td12
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp259
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h14
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp248
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h9
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp170
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp124
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp510
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp388
-rw-r--r--lib/Target/ARM/ARMISelLowering.h21
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td24
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td78
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td98
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td64
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td143
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td16
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h10
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td180
-rw-r--r--lib/Target/ARM/ARMRelocations.h8
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp116
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h17
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp4
-rw-r--r--lib/Target/ARM/ARMSubtarget.h13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h11
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp35
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp7
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMMCInstLower.h2
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp2
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp128
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp50
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h15
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp30
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h9
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp19
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h9
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td2
-rw-r--r--lib/Target/Alpha/AlphaSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/Alpha/AlphaSelectionDAGInfo.h4
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp3
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.h5
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp11
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.h9
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.td30
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp10
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h7
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.td54
-rw-r--r--lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/Blackfin/BlackfinSelectionDAGInfo.h4
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.cpp1
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.h5
-rw-r--r--lib/Target/CBackend/CBackend.cpp13
-rw-r--r--lib/Target/CBackend/CTargetMachine.h11
-rw-r--r--lib/Target/CellSPU/README.txt2
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp2
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp23
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h9
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td6
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp4
-rw-r--r--lib/Target/CellSPU/SPUSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/CellSPU/SPUSelectionDAGInfo.h4
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp1
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h6
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp15
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h11
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp14
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h9
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp12
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td10
-rw-r--r--lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeSelectionDAGInfo.h4
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h5
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp23
-rw-r--r--lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h2
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp6
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp18
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h15
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td13
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.cpp5
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h6
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp12
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp16
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp10
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td26
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.h4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h6
-rw-r--r--lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h2
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp14
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.h2
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp11
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h9
-rw-r--r--lib/Target/PIC16/PIC16Section.h8
-rw-r--r--lib/Target/PIC16/PIC16SelectionDAGInfo.cpp5
-rw-r--r--lib/Target/PIC16/PIC16SelectionDAGInfo.h4
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.cpp2
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.h6
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp8
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp5
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h7
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp17
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td18
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp14
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td23
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp3
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h5
-rw-r--r--lib/Target/README.txt15
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp12
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h9
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td6
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h5
-rw-r--r--lib/Target/SubtargetFeature.cpp34
-rw-r--r--lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp22
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp22
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h15
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h11
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td58
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h6
-rw-r--r--lib/Target/TargetMachine.cpp12
-rw-r--r--lib/Target/TargetRegisterInfo.cpp6
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp199
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp94
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.h2
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp139
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.h2
-rw-r--r--lib/Target/X86/CMakeLists.txt7
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp61
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c8
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h33
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h3
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp4
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp86
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h14
-rw-r--r--lib/Target/X86/X86CallingConv.td14
-rw-r--r--lib/Target/X86/X86FastISel.cpp27
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp110
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp12
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp370
-rw-r--r--lib/Target/X86/X86ISelLowering.h20
-rw-r--r--lib/Target/X86/X86Instr64bit.td155
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp65
-rw-r--r--lib/Target/X86/X86InstrInfo.h15
-rw-r--r--lib/Target/X86/X86InstrInfo.td174
-rw-r--r--lib/Target/X86/X86InstrMMX.td11
-rw-r--r--lib/Target/X86/X86InstrSSE.td162
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp95
-rw-r--r--lib/Target/X86/X86RegisterInfo.h10
-rw-r--r--lib/Target/X86/X86RegisterInfo.td197
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp225
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h32
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp32
-rw-r--r--lib/Target/X86/X86TargetMachine.h5
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp21
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h15
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h4
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp3
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h6
180 files changed, 4305 insertions, 1971 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index b08f942..ae7ae59 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -48,7 +48,7 @@ namespace ARMCC {
AL
};
- inline static CondCodes getOppositeCondition(CondCodes CC){
+ inline static CondCodes getOppositeCondition(CondCodes CC) {
switch (CC) {
default: llvm_unreachable("Unknown condition code");
case EQ: return NE;
@@ -67,7 +67,7 @@ namespace ARMCC {
case LE: return GT;
}
}
-}
+} // namespace ARMCC
inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
switch (CC) {
@@ -90,6 +90,10 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
+/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
+/// operations involving sub-registers.
+bool ModelWithRegSequence();
+
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index b4dec0c..f1e6a9f 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -32,6 +32,8 @@ def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
"ARM v6t2">;
def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
"ARM v7A">;
+def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+ "ARM v7M">;
def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
"Enable VFP2 instructions">;
def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
@@ -42,6 +44,10 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
"Enable Thumb2 instructions">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+ "Enable divide instructions">;
+def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+ "Enable Thumb2 extract and pack instructions">;
// Some processors have multiply-accumulate instructions that don't
// play nicely with other VFP instructions, and it's generally better
@@ -123,9 +129,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries,
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
- FeatureNEONForFP]>;
+ FeatureNEONForFP, FeatureT2ExtractPack]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON]>;
+ [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
+def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
+def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index a193858..2528854 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -196,6 +197,42 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return NewMIs[0];
}
+bool
+ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // Insert the spill to the stack frame. The register is killed at the spill
+ //
+ storeRegToStackSlot(MBB, MI, Reg, isKill,
+ CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI);
+ }
+ return true;
+}
+
// Branch analysis.
bool
ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
@@ -481,6 +518,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
return 24;
@@ -540,16 +581,17 @@ bool
ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
switch (MI.getOpcode()) {
default: break;
case ARM::VMOVS:
case ARM::VMOVD:
case ARM::VMOVDneon:
- case ARM::VMOVQ: {
+ case ARM::VMOVQ:
+ case ARM::VMOVQQ : {
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
+ SrcSubIdx = MI.getOperand(1).getSubReg();
+ DstSubIdx = MI.getOperand(0).getSubReg();
return true;
}
case ARM::MOVr:
@@ -564,6 +606,8 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
"Invalid ARM MOV instruction");
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
+ SrcSubIdx = MI.getOperand(1).getSubReg();
+ DstSubIdx = MI.getOperand(0).getSubReg();
return true;
}
}
@@ -654,10 +698,8 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
if (DestRC == ARM::tGPRRegisterClass)
@@ -679,6 +721,12 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
SrcRC == ARM::QPR_8RegisterClass)
SrcRC = ARM::QPRRegisterClass;
+ // Allow QQPR / QQPR_VFP2 cross-class copies.
+ if (DestRC == ARM::QQPR_VFP2RegisterClass)
+ DestRC = ARM::QQPRRegisterClass;
+ if (SrcRC == ARM::QQPR_VFP2RegisterClass)
+ SrcRC = ARM::QQPRRegisterClass;
+
// Disallow copies of unequal sizes.
if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
return false;
@@ -703,20 +751,36 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVDneon;
else if (DestRC == ARM::QPRRegisterClass)
Opc = ARM::VMOVQ;
+ else if (DestRC == ARM::QQPRRegisterClass)
+ Opc = ARM::VMOVQQ;
+ else if (DestRC == ARM::QQQQPRRegisterClass)
+ Opc = ARM::VMOVQQQQ;
else
return false;
- AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg)
- .addReg(SrcReg));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg));
}
return true;
}
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+ unsigned Reg, unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
+
void ARMBaseInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
@@ -738,45 +802,82 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+ } else if (RC == ARM::SPRRegisterClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (RC == ARM::DPRRegisterClass ||
RC == ARM::DPR_VFP2RegisterClass ||
RC == ARM::DPR_8RegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else {
- assert((RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
+ } else if (RC == ARM::QPRRegisterClass ||
+ RC == ARM::QPR_VFP2RegisterClass ||
+ RC == ARM::QPR_8RegisterClass) {
// FIXME: Neon instructions should support predicates
- if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
.addFrameIndex(FI).addImm(128)
- .addMemOperand(MMO)
- .addReg(SrcReg, getKillRegState(isKill)));
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)).
- addReg(SrcReg, getKillRegState(isKill))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
+ .addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
+ } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32))
+ .addFrameIndex(FI).addImm(128);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ AddDefaultPred(MIB.addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ } else {
+ assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
}
}
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
-
MachineMemOperand *MMO =
MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
MachineMemOperand::MOLoad, 0,
@@ -791,20 +892,18 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+ } else if (RC == ARM::SPRRegisterClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (RC == ARM::DPRRegisterClass ||
RC == ARM::DPR_VFP2RegisterClass ||
RC == ARM::DPR_8RegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else {
- assert((RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
- if (Align >= 16
- && (getRegisterInfo().canRealignStack(MF))) {
+ } else if (RC == ARM::QPRRegisterClass ||
+ RC == ARM::QPR_VFP2RegisterClass ||
+ RC == ARM::QPR_8RegisterClass) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
.addFrameIndex(FI).addImm(128)
.addMemOperand(MMO));
@@ -814,6 +913,40 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
+ } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ }
+ } else {
+ assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
}
}
@@ -930,8 +1063,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
DstSubReg)
.addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
}
- }
- else if (Opc == ARM::VMOVD) {
+ } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) {
unsigned Pred = MI->getOperand(2).getImm();
unsigned PredReg = MI->getOperand(3).getReg();
if (OpNum == 0) { // move -> store
@@ -957,6 +1089,56 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
DstSubReg)
.addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
}
+ } else if (Opc == ARM::VMOVQ) {
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Pred = MI->getOperand(2).getImm();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned SrcSubReg = MI->getOperand(1).getSubReg();
+ bool isKill = MI->getOperand(1).isKill();
+ bool isUndef = MI->getOperand(1).isUndef();
+ if (MFI.getObjectAlignment(FI) >= 16 &&
+ getRegisterInfo().canRealignStack(MF)) {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q))
+ .addFrameIndex(FI).addImm(128)
+ .addReg(SrcReg,
+ getKillRegState(isKill) | getUndefRegState(isUndef),
+ SrcSubReg)
+ .addImm(Pred).addReg(PredReg);
+ } else {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ))
+ .addReg(SrcReg,
+ getKillRegState(isKill) | getUndefRegState(isUndef),
+ SrcSubReg)
+ .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(Pred).addReg(PredReg);
+ }
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned DstSubReg = MI->getOperand(0).getSubReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isUndef = MI->getOperand(0).isUndef();
+ if (MFI.getObjectAlignment(FI) >= 16 &&
+ getRegisterInfo().canRealignStack(MF)) {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q))
+ .addReg(DstReg,
+ RegState::Define |
+ getDeadRegState(isDead) |
+ getUndefRegState(isUndef),
+ DstSubReg)
+ .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg);
+ } else {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ))
+ .addReg(DstReg,
+ RegState::Define |
+ getDeadRegState(isDead) |
+ getUndefRegState(isUndef),
+ DstSubReg)
+ .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(Pred).addReg(PredReg);
+ }
+ }
}
return NewMI;
@@ -985,12 +1167,13 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
Opc == ARM::tMOVtgpr2gpr ||
Opc == ARM::tMOVgpr2tgpr) {
return true;
- } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) {
+ } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD ||
+ Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
return true;
- } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
- return false; // FIXME
}
+ // FIXME: VMOVQQ and VMOVQQQQ?
+
return false;
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 7a5630e..b566271 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -200,6 +200,11 @@ public:
virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
const ARMSubtarget &getSubtarget() const { return Subtarget; }
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
// Branch analysis.
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -257,17 +262,20 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index bc12187..82458d2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -259,10 +259,10 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
unsigned SubIdx) const {
switch (SubIdx) {
default: return 0;
- case 1:
- case 2:
- case 3:
- case 4:
+ case ARM::ssub_0:
+ case ARM::ssub_1:
+ case ARM::ssub_2:
+ case ARM::ssub_3: {
// S sub-registers.
if (A->getSize() == 8) {
if (B == &ARM::SPR_8RegClass)
@@ -273,22 +273,201 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &ARM::DPR_VFP2RegClass;
}
- assert(A->getSize() == 16 && "Expecting a Q register class!");
- if (B == &ARM::SPR_8RegClass)
- return &ARM::QPR_8RegClass;
- return &ARM::QPR_VFP2RegClass;
- case 5:
- case 6:
- // D sub-registers.
- if (B == &ARM::DPR_VFP2RegClass)
+ if (A->getSize() == 16) {
+ if (B == &ARM::SPR_8RegClass)
+ return &ARM::QPR_8RegClass;
return &ARM::QPR_VFP2RegClass;
- if (B == &ARM::DPR_8RegClass)
- return &ARM::QPR_8RegClass;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::SPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return &ARM::QQPR_VFP2RegClass;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::dsub_0:
+ case ARM::dsub_1:
+ case ARM::dsub_2:
+ case ARM::dsub_3: {
+ // D sub-registers.
+ if (A->getSize() == 16) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B != &ARM::DPRRegClass)
+ return 0; // Do not allow coalescing!
return A;
}
+ case ARM::dsub_4:
+ case ARM::dsub_5:
+ case ARM::dsub_6:
+ case ARM::dsub_7: {
+ // D sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::DPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+
+ case ARM::qsub_0:
+ case ARM::qsub_1: {
+ // Q sub-registers.
+ if (A->getSize() == 32) {
+ if (B == &ARM::QPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::QPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::qsub_2:
+ case ARM::qsub_3: {
+ // Q sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ }
return 0;
}
+bool
+ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const {
+
+ unsigned Size = RC->getSize() * 8;
+ if (Size < 6)
+ return 0;
+
+ NewSubIdx = 0; // Whole register.
+ unsigned NumRegs = SubIndices.size();
+ if (NumRegs == 8) {
+ // 8 D registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[0] == ARM::dsub_0 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3 &&
+ SubIndices[4] == ARM::dsub_4 &&
+ SubIndices[5] == ARM::dsub_5 &&
+ SubIndices[6] == ARM::dsub_6 &&
+ SubIndices[7] == ARM::dsub_7);
+ } else if (NumRegs == 4) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 4 Q registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[1] == ARM::qsub_1 &&
+ SubIndices[2] == ARM::qsub_2 &&
+ SubIndices[3] == ARM::qsub_3);
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 4 D registers -> 1 QQ register.
+ if (Size >= 256 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 4 D registers -> 1 QQ register (2nd).
+ if (Size == 512 &&
+ SubIndices[1] == ARM::dsub_5 &&
+ SubIndices[2] == ARM::dsub_6 &&
+ SubIndices[3] == ARM::dsub_7) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 4 S registers -> 1 Q register.
+ if (Size >= 128 &&
+ SubIndices[1] == ARM::ssub_1 &&
+ SubIndices[2] == ARM::ssub_2 &&
+ SubIndices[3] == ARM::ssub_3) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ }
+ } else if (NumRegs == 2) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 2 Q registers -> 1 QQ register.
+ if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::qsub_2) {
+ // 2 Q registers -> 1 QQ register (2nd).
+ if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 2 D registers -> 1 Q register.
+ if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_2) {
+ // 2 D registers -> 1 Q register (2nd).
+ if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
+ NewSubIdx = ARM::qsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
+ NewSubIdx = ARM::qsub_2;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_6) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
+ NewSubIdx = ARM::qsub_3;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 2 S registers -> 1 D register.
+ if (SubIndices[1] == ARM::ssub_1) {
+ if (Size >= 128)
+ NewSubIdx = ARM::dsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_2) {
+ // 2 S registers -> 1 D register (2nd).
+ if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
+ NewSubIdx = ARM::dsub_1;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
const TargetRegisterClass *
ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
return ARM::GPRRegisterClass;
@@ -481,7 +660,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return ((DisableFramePointerElim(MF) && MFI->hasCalls())||
+ return ((DisableFramePointerElim(MF) && MFI->adjustsStack())||
needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
@@ -509,7 +688,7 @@ needsStackRealignment(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (DisableFramePointerElim(MF) && MFI->hasCalls())
+ if (DisableFramePointerElim(MF) && MFI->adjustsStack())
return true;
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
|| needsStackRealignment(MF);
@@ -545,24 +724,25 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
I != E; ++I) {
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
if (!I->getOperand(i).isFI()) continue;
-
- const TargetInstrDesc &Desc = TII.get(I->getOpcode());
- unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
- if (AddrMode == ARMII::AddrMode3 ||
- AddrMode == ARMII::AddrModeT2_i8)
- return (1 << 8) - 1;
-
- if (AddrMode == ARMII::AddrMode5 ||
- AddrMode == ARMII::AddrModeT2_i8s4)
+ switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode3:
+ case ARMII::AddrModeT2_i8:
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode5:
+ case ARMII::AddrModeT2_i8s4:
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
-
- if (AddrMode == ARMII::AddrModeT2_i12 && hasFP(MF))
- // When the stack offset is negative, we will end up using
- // the i8 instructions instead.
- return (1 << 8) - 1;
-
- if (AddrMode == ARMII::AddrMode6)
+ break;
+ case ARMII::AddrModeT2_i12:
+ if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode6:
+ // Addressing mode 6 (load/store) instructions can't encode an
+ // immediate offset for stack references.
return 0;
+ default:
+ break;
+ }
break; // At most one FI per instruction
}
}
@@ -750,7 +930,9 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
while (NumExtras && !UnspilledCS1GPRs.empty()) {
unsigned Reg = UnspilledCS1GPRs.back();
UnspilledCS1GPRs.pop_back();
- if (!isReservedReg(MF, Reg)) {
+ if (!isReservedReg(MF, Reg) &&
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+ Reg == ARM::LR)) {
Extras.push_back(Reg);
NumExtras--;
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 456c392..2c9c82d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -81,6 +81,15 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
+ /// canCombinedSubRegIndex - Given a register class and a list of sub-register
+ /// indices, return true if it's possible to combine the sub-register indices
+ /// into one that corresponds to a larger sub-register. Return the new sub-
+ /// register index by reference. Note the new index by be zero if the given
+ /// sub-registers combined to form the whole register.
+ virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
+
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f84f85a..f2730fc 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -88,6 +88,7 @@ namespace {
void emitWordLE(unsigned Binary);
void emitDWordLE(uint64_t Binary);
void emitConstPoolInstruction(const MachineInstr &MI);
+ void emitMOVi32immInstruction(const MachineInstr &MI);
void emitMOVi2piecesInstruction(const MachineInstr &MI);
void emitLEApcrelJTInstruction(const MachineInstr &MI);
void emitPseudoMoveInstruction(const MachineInstr &MI);
@@ -145,6 +146,15 @@ namespace {
return getMachineOpValue(MI, MI.getOperand(OpIdx));
}
+ /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+ /// machine operand requires relocation, record the relocation and return zero.
+ unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+ unsigned Reloc);
+ unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
+ unsigned Reloc) {
+ return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc);
+ }
+
/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
///
unsigned getShiftOp(unsigned Imm) const ;
@@ -217,6 +227,31 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
return 0;
}
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+ const MachineOperand &MO,
+ unsigned Reloc) {
+ assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+ && "Relocation to this function should be for movt or movw");
+
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), Reloc);
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable("Unsupported operand type for movw/movt");
+ }
+ return 0;
+}
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
@@ -438,6 +473,42 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
}
}
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+
+ // Emit the 'movw' instruction.
+ unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
+
+ unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm12
+ Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+ Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+ emitWordLE(Binary);
+
+ unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+ // Emit the 'movt' instruction.
+ Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm1, same as movw above.
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+}
+
void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
const MachineOperand &MO0 = MI.getOperand(0);
const MachineOperand &MO1 = MI.getOperand(1);
@@ -557,7 +628,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
switch (Opcode) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
- // FIXME: Add support for MOVimm32.
case TargetOpcode::INLINEASM: {
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
@@ -604,6 +674,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
emitMiscLoadStoreInstruction(MI, ARM::PC);
break;
}
+
+ case ARM::MOVi32imm:
+ emitMOVi32immInstruction(MI);
+ break;
+
case ARM::MOVi2pieces:
// Two instructions to materialize a constant.
emitMOVi2piecesInstruction(MI);
@@ -706,10 +781,6 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
unsigned ImplicitRn) {
const TargetInstrDesc &TID = MI.getDesc();
- if (TID.Opcode == ARM::BFC) {
- report_fatal_error("ARMv6t2 JIT is not yet supported.");
- }
-
// Part of binary is determined by TableGn.
unsigned Binary = getBinaryCodeForInstr(MI);
@@ -729,6 +800,45 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
<< ARMII::RegRdShift);
+ if (TID.Opcode == ARM::MOVi16) {
+ // Get immediate from MI.
+ unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movw);
+ // Encode imm which is the same as in emitMOVi32immInstruction().
+ Binary |= Lo16 & 0xFFF;
+ Binary |= ((Lo16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if(TID.Opcode == ARM::MOVTi16) {
+ unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movt) >> 16);
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) {
+ uint32_t v = ~MI.getOperand(2).getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+ // Instr{20-16} = msb, Instr{11-7} = lsb
+ Binary |= (msb & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) {
+ // Encode Rn in Instr{0-3}
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+ uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+ // Instr{20-16} = widthm1, Instr{11-7} = lsb
+ Binary |= (widthm1 & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ }
+
// If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
++OpIdx;
@@ -1366,18 +1476,66 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
break;
++NumRegs;
}
- Binary |= NumRegs * 2;
+ // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+ // Otherwise, it will be 0, in the case of 32-bit registers.
+ if(Binary & 0x100)
+ Binary |= NumRegs * 2;
+ else
+ Binary |= NumRegs;
emitWordLE(Binary);
}
void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
// Part of binary is determined by TableGn.
unsigned Binary = getBinaryCodeForInstr(MI);
// Set the conditional execution predicate
Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ switch(Opcode) {
+ default:
+ llvm_unreachable("ARMCodeEmitter::emitMiscInstruction");
+
+ case ARM::FMSTAT:
+ // No further encoding needed.
+ break;
+
+ case ARM::VMRS:
+ case ARM::VMSR: {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ // Encode Rt.
+ Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg())
+ << ARMII::RegRdShift;
+ break;
+ }
+
+ case ARM::FCONSTD:
+ case ARM::FCONSTS: {
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 0);
+
+ // Encode imm., Table A7-18 VFP modified immediate constants
+ const MachineOperand &MO1 = MI.getOperand(1);
+ unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF()
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ unsigned ModifiedImm;
+
+ if(Opcode == ARM::FCONSTS)
+ ModifiedImm = (Imm & 0x80000000) >> 24 | // a
+ (Imm & 0x03F80000) >> 19; // bcdefgh
+ else // Opcode == ARM::FCONSTD
+ ModifiedImm = (Imm & 0x80000000) >> 24 | // a
+ (Imm & 0x007F0000) >> 16; // bcdefgh
+
+ // Insts{19-16} = abcd, Insts{3-0} = efgh
+ Binary |= ((ModifiedImm & 0xF0) >> 4) << 16;
+ Binary |= (ModifiedImm & 0xF);
+ break;
+ }
+ }
+
emitWordLE(Binary);
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 845d088..c87f5d7 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -29,6 +29,7 @@ namespace {
ARMExpandPseudo() : MachineFunctionPass(&ID) {}
const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -37,11 +38,31 @@ namespace {
}
private:
+ void TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMBB(MachineBasicBlock &MBB);
};
char ARMExpandPseudo::ID = 0;
}
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI,
+ MachineInstrBuilder &DefMI) {
+ const TargetInstrDesc &Desc = OldMI.getDesc();
+ for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = OldMI.getOperand(i);
+ assert(MO.isReg() && MO.getReg());
+ if (MO.isUse())
+ UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill()));
+ else
+ DefMI.addReg(MO.getReg(),
+ getDefRegState(true) | getDeadRegState(MO.isDead()));
+ }
+}
+
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -58,52 +79,82 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
? ARM::tLDRpci : ARM::t2LDRpci;
unsigned DstReg = MI.getOperand(0).getReg();
- if (!MI.getOperand(0).isDead()) {
- MachineInstr *NewMI =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(NewLdOpc), DstReg)
- .addOperand(MI.getOperand(1)));
- NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
- .addReg(DstReg, getDefRegState(true))
- .addReg(DstReg)
- .addOperand(MI.getOperand(2));
- }
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MachineInstrBuilder MIB1 =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::tPICADD))
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
Modified = true;
break;
}
+
case ARM::t2MOVi32imm: {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
unsigned DstReg = MI.getOperand(0).getReg();
- if (!MI.getOperand(0).isDead()) {
- const MachineOperand &MO = MI.getOperand(1);
- MachineInstrBuilder LO16, HI16;
-
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
- DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
- .addReg(DstReg, getDefRegState(true)).addReg(DstReg);
-
- if (MO.isImm()) {
- unsigned Imm = MO.getImm();
- unsigned Lo16 = Imm & 0xffff;
- unsigned Hi16 = (Imm >> 16) & 0xffff;
- LO16 = LO16.addImm(Lo16);
- HI16 = HI16.addImm(Hi16);
- } else {
- const GlobalValue *GV = MO.getGlobal();
- unsigned TF = MO.getTargetFlags();
- LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
- HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
- // FIXME: What's about memoperands?
- }
- AddDefaultPred(LO16);
- AddDefaultPred(HI16);
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO = MI.getOperand(1);
+ MachineInstrBuilder LO16, HI16;
+
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+ DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
}
+ (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ Modified = true;
+ break;
+ }
+
+ case ARM::VMOVQQ: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
+ unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1);
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ bool SrcIsKill = MI.getOperand(1).isKill();
+ unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
+ unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1);
+ MachineInstrBuilder Even =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::VMOVQ))
+ .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ MachineInstrBuilder Odd =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::VMOVQ))
+ .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
Modified = true;
}
- // FIXME: expand t2MOVi32imm
}
MBBI = NMBBI;
}
@@ -113,6 +164,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
bool Modified = false;
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 616942c..9baef6b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -37,7 +37,8 @@ using namespace llvm;
static cl::opt<bool>
UseRegSeq("neon-reg-sequence", cl::Hidden,
- cl::desc("Use reg_sequence to model ld / st of multiple neon regs"));
+ cl::desc("Use reg_sequence to model ld / st of multiple neon regs"),
+ cl::init(true));
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
@@ -164,15 +165,34 @@ private:
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
+ SDNode *SelectConcatVector(SDNode *N);
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
- /// PairDRegs - Insert a pair of double registers into an implicit def to
- /// form a quad register.
+ /// PairDRegs - Form a quad register from a pair of D registers.
+ ///
SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+
+ /// PairDRegs - Form a quad register pair from a pair of Q registers.
+ ///
+ SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+ /// QuadDRegs - Form a quad register pair from a quad of D registers.
+ ///
+ SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ /// QuadQRegs - Form 4 consecutive Q registers.
+ ///
+ SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ /// OctoDRegs - Form 8 consecutive D registers.
+ ///
+ SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
+ SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
}
@@ -940,13 +960,13 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
-/// PairDRegs - Insert a pair of double registers into an implicit def to
-/// form a quad register.
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
- SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32);
- SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32);
- if (UseRegSeq) {
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ if (llvm::ModelWithRegSequence()) {
const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
@@ -958,6 +978,62 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
VT, SDValue(Pair, 0), V1, SubReg1);
}
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// OctoDRegs - Form 8 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3,
+ SDValue V4, SDValue V5,
+ SDValue V6, SDValue V7) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
+ SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
+ SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
+ SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
+ const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
+ V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
+}
+
/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
/// for a 64-bit subregister of the vector.
static EVT GetNEONSubregVT(EVT VT) {
@@ -1011,7 +1087,34 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ if (!llvm::ModelWithRegSequence() || NumVecs < 2)
+ return VLd;
+
+ SDValue RegSeq;
+ SDValue V0 = SDValue(VLd, 0);
+ SDValue V1 = SDValue(VLd, 1);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = SDValue(VLd, 2);
+ // If it's a vld3, form a quad D-register but discard the last part.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : SDValue(VLd, 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), D);
+ }
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+ return NULL;
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1026,9 +1129,24 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ if (llvm::ModelWithRegSequence()) {
+ if (NumVecs == 1) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
+ ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
+ } else {
+ SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
+ SDValue(VLd, 0), SDValue(VLd, 1),
+ SDValue(VLd, 2), SDValue(VLd, 3)), 0);
+ SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
+ SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
+ ReplaceUses(SDValue(N, 0), Q0);
+ ReplaceUses(SDValue(N, 1), Q1);
+ }
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
+ ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ }
}
} else {
// Otherwise, quad registers are loaded with two separate instructions,
@@ -1051,10 +1169,37 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
Chain = SDValue(VLdB, NumVecs+1);
- // Combine the even and odd subregs to produce the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ if (llvm::ModelWithRegSequence()) {
+ SDValue V0 = SDValue(VLdA, 0);
+ SDValue V1 = SDValue(VLdB, 0);
+ SDValue V2 = SDValue(VLdA, 1);
+ SDValue V3 = SDValue(VLdB, 1);
+ SDValue V4 = SDValue(VLdA, 2);
+ SDValue V5 = SDValue(VLdB, 2);
+ SDValue V6 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
+ 0)
+ : SDValue(VLdA, 3);
+ SDValue V7 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
+ 0)
+ : SDValue(VLdB, 3);
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
+ V4, V5, V6, V7), 0);
+
+ // Extract out the 3 / 4 Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), Q);
+ }
+ } else {
+ // Combine the even and odd subregs to produce the result.
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
+ ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ }
}
}
ReplaceUses(SDValue(N, NumVecs), Chain);
@@ -1102,12 +1247,43 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
Ops.push_back(Align);
if (is64BitVector) {
- unsigned Opc = DOpcodes[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ if (llvm::ModelWithRegSequence() && NumVecs >= 2) {
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(2+3);
+ // If it's a vld3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
+ RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
+ RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
+ RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
+ RegSeq));
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(N->getOperand(Vec+3));
+ }
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
+ unsigned Opc = DOpcodes[OpcodeIndex];
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
}
@@ -1116,48 +1292,114 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
// Quad registers are directly supported for VST1 and VST2,
// storing pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(Vec+3)));
+ if (llvm::ModelWithRegSequence() && NumVecs == 2) {
+ // First extract the pair of Q registers.
+ SDValue Q0 = N->getOperand(3);
+ SDValue Q1 = N->getOperand(4);
+
+ // Form a QQ register.
+ SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
+ QQ));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3)));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3)));
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+ 5 + 2 * NumVecs);
}
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- 5 + 2 * NumVecs);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
+ if (llvm::ModelWithRegSequence()) {
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3));
+ V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ }
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
- Ops.push_back(Reg0); // post-access address offset
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
- // Store the even subregs.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStA, 1);
-
- // Store the odd subregs.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(Vec+3));
- Ops[NumVecs+5] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
+ // Store the even D registers.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ Ops.push_back(Reg0); // post-access address offset
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
+ RegVT, RegSeq));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
+ RegVT, RegSeq);
+ Ops[NumVecs+5] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
+ } else {
+ Ops.push_back(Reg0); // post-access address offset
+
+ // Store the even subregs.
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3)));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd subregs.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ Ops[NumVecs+5] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
+ }
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
@@ -1180,11 +1422,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
// Quad registers are handled by load/store of subregs. Find the subreg info.
unsigned NumElts = 0;
int SubregIdx = 0;
+ bool Even = false;
EVT RegVT = VT;
if (!is64BitVector) {
RegVT = GetNEONSubregVT(VT);
NumElts = RegVT.getVectorNumElements();
- SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
+ SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1;
+ Even = Lane < NumElts;
}
unsigned OpcodeIndex;
@@ -1211,8 +1455,35 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned Opc = 0;
if (is64BitVector) {
Opc = DOpcodes[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ if (llvm::ModelWithRegSequence()) {
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
+ RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
+ RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
+ RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
+ RegSeq));
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(N->getOperand(Vec+3));
+ }
} else {
// Check if this is loading the even or odd subreg of a Q register.
if (Lane < NumElts) {
@@ -1221,10 +1492,32 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
Lane -= NumElts;
Opc = QOpcodes1[OpcodeIndex];
}
- // Extract the subregs of the input vector.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
- N->getOperand(Vec+3)));
+
+ if (llvm::ModelWithRegSequence()) {
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ }
+
+ // Extract the subregs of the input vector.
+ unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
+ RegSeq));
+ } else {
+ // Extract the subregs of the input vector.
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
+ N->getOperand(Vec+3)));
+ }
}
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
@@ -1236,8 +1529,60 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
std::vector<EVT> ResTys(NumVecs, RegVT);
ResTys.push_back(MVT::Other);
- SDNode *VLdLn =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
+
+ if (llvm::ModelWithRegSequence()) {
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ if (is64BitVector) {
+ SDValue V0 = SDValue(VLdLn, 0);
+ SDValue V1 = SDValue(VLdLn, 1);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ } else {
+ SDValue V2 = SDValue(VLdLn, 2);
+ // If it's a vld3, form a quad D-register but discard the last part.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : SDValue(VLdLn, 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // For 128-bit vectors, take the 64-bit results of the load and insert them
+ // as subregs into the result.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ if (Even) {
+ V[i] = SDValue(VLdLn, Vec);
+ V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ } else {
+ V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ V[i+1] = SDValue(VLdLn, Vec);
+ }
+ }
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+
+ if (NumVecs == 2)
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
+ else
+ RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
+ }
+
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
+ return NULL;
+ }
+
// For a 64-bit vector load to D registers, nothing more needs to be done.
if (is64BitVector)
return VLdLn;
@@ -1481,6 +1826,21 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() || N->getNumOperands() != 2)
+ llvm_unreachable("unexpected CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
@@ -1695,8 +2055,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
- return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other,
- Ops, 5);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
+ MVT::v2f64, MVT::Other, Ops, 5);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ return Ret;
}
// Other cases are autogenerated.
break;
@@ -1712,7 +2076,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
AM5Opc, Pred, PredReg, Chain };
- return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ return Ret;
}
// Other cases are autogenerated.
break;
@@ -1971,7 +2339,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
}
+ break;
}
+
+ case ISD::CONCAT_VECTORS:
+ return SelectConcatVector(N);
}
return SelectCode(N);
@@ -1995,3 +2367,9 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new ARMDAGToDAGISel(TM, OptLevel);
}
+
+/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
+/// operations involving sub-registers.
+bool llvm::ModelWithRegSequence() {
+ return UseRegSeq;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index d3842a6..b8126a3 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -94,7 +94,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
}
setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
+ if (llvm::ModelWithRegSequence())
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+ else
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
@@ -360,8 +363,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
// These are expanded into libcalls.
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ if (!Subtarget->hasDivide()) {
+ // v7M has a hardware divider
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ }
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
@@ -373,6 +379,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
// Use the default implementation.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
@@ -387,7 +395,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
+ // If the subtarget does not have extract instructions, sign_extend_inreg
+ // needs to be expanded. Extract is available in ARM mode on v6 and up,
+ // and on most Thumb2 implementations.
+ if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
+ || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
@@ -400,6 +412,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@@ -451,9 +465,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::MUL);
setStackPointerRegisterToSaveRestore(ARM::SP);
- setSchedulingPreference(SchedulingForRegPressure);
+
+ if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
// FIXME: If-converter should use instruction latency to determine
// profitability rather than relying on fixed limits.
@@ -567,11 +586,35 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+ // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+ // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+ // load / store 4 to 8 consecutive D registers.
+ if (Subtarget->hasNEON()) {
+ if (VT == MVT::v4i64)
+ return ARM::QQPRRegisterClass;
+ else if (VT == MVT::v8i64)
+ return ARM::QQQQPRRegisterClass;
+ }
+ return TargetLowering::getRegClassFor(VT);
+}
+
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
}
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT.isFloatingPoint() || VT.isVector())
+ return Sched::Latency;
+ }
+ return Sched::RegPressure;
+}
+
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
@@ -1507,6 +1550,23 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
}
SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = Subtarget->isThumb() ?
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
+ DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+ Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget)
const {
@@ -1545,12 +1605,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
return Result;
}
- case Intrinsic::eh_sjlj_setjmp:
- SDValue Val = Subtarget->isThumb() ?
- DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
- DAG.getConstant(0, MVT::i32);
- return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1),
- Val);
}
}
@@ -1652,7 +1706,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
RC = ARM::GPRRegisterClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
@@ -2092,9 +2146,31 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
}
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ NULL, 0, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -2107,116 +2183,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
-SDValue
-ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
- // Do repeated 4-byte loads and stores. To be improved.
- // This requires 4-byte alignment.
- if ((Align & 3) != 0)
- return SDValue();
- // This requires the copy size to be a constant, preferrably
- // within a subtarget-specific limit.
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (!ConstantSize)
- return SDValue();
- uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
- return SDValue();
-
- unsigned BytesLeft = SizeVal & 3;
- unsigned NumMemOps = SizeVal >> 2;
- unsigned EmittedNumMemOps = 0;
- EVT VT = MVT::i32;
- unsigned VTSize = 4;
- unsigned i = 0;
- const unsigned MAX_LOADS_IN_LDM = 6;
- SDValue TFOps[MAX_LOADS_IN_LDM];
- SDValue Loads[MAX_LOADS_IN_LDM];
- uint64_t SrcOff = 0, DstOff = 0;
-
- // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
- // same number of stores. The loads and stores will get combined into
- // ldm/stm later on.
- while (EmittedNumMemOps < NumMemOps) {
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
- DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
- TFOps[i] = Loads[i].getValue(1);
- SrcOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff, isVolatile, false, 0);
- DstOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- EmittedNumMemOps += i;
- }
-
- if (BytesLeft == 0)
- return Chain;
-
- // Issue loads / stores for the trailing (1 - 3) bytes.
- unsigned BytesLeftSave = BytesLeft;
- i = 0;
- while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
- DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff, false, false, 0);
- TFOps[i] = Loads[i].getValue(1);
- ++i;
- SrcOff += VTSize;
- BytesLeft -= VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- i = 0;
- BytesLeft = BytesLeftSave;
- while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff, false, false, 0);
- ++i;
- DstOff += VTSize;
- BytesLeft -= VTSize;
- }
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-}
-
/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -2434,9 +2400,9 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
// Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, MVT::i32));
// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
// captures the result into a carry flag.
@@ -2879,21 +2845,60 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
}
- // If there are only 2 elements in a 128-bit vector, insert them into an
- // undef vector. This handles the common case for 128-bit vector argument
- // passing, where the insertions should be translated to subreg accesses
- // with no real instructions.
- if (VT.is128BitVector() && Op.getNumOperands() == 2) {
- SDValue Val = DAG.getUNDEF(VT);
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- if (Op0.getOpcode() != ISD::UNDEF)
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0,
- DAG.getIntPtrConstant(0));
- if (Op1.getOpcode() != ISD::UNDEF)
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1,
- DAG.getIntPtrConstant(1));
- return Val;
+ // Scan through the operands to see if only one value is used.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool isConstant = true;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value.getNode())
+ Value = V;
+ else if (V != Value)
+ usesOnlyOneValue = false;
+ }
+
+ if (!Value.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ // If all elements are constants, fall back to the default expansion, which
+ // will generate a load from the constant pool.
+ if (isConstant)
+ return SDValue();
+
+ // Use VDUP for non-constant splats.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (usesOnlyOneValue && EltSize <= 32)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SDValue Val = DAG.getUNDEF(VecVT);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
+ DAG.getConstant(i, MVT::i32));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
return SDValue();
@@ -3083,8 +3088,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
- if (VT.getVectorNumElements() == 4 &&
- (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)
@@ -3096,7 +3101,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
-
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
@@ -3104,6 +3108,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
+ // Implement shuffles with 32- or 64-bit elements as subreg copies.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
+ SDValue Val = DAG.getUNDEF(VecVT);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (ShuffleMask[i] < 0)
+ continue;
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32));
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
+ Elt, DAG.getConstant(i, MVT::i32));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
+ }
+
return SDValue();
}
@@ -3158,9 +3185,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
- case ISD::RETURNADDR: break;
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
@@ -3667,6 +3696,62 @@ static SDValue PerformSUBCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (Subtarget->isThumb1Only())
+ return SDValue();
+
+ if (DAG.getMachineFunction().
+ getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ uint64_t MulAmt = C->getZExtValue();
+ unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ ShiftAmt = ShiftAmt & (32 - 1);
+ SDValue V = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Res;
+ MulAmt >>= ShiftAmt;
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V, DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt-1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt+1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+
+ if (ShiftAmt != 0)
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4053,6 +4138,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
@@ -4432,9 +4518,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isSEXTLoad = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
} else
return false;
@@ -4442,13 +4530,25 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isLegal = false;
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
- isInc, DAG);
+ isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
if (!isLegal)
return false;
+ if (Ptr != Base) {
+ // Swap base ptr and offset to catch more post-index load / store when
+ // it's legal. In Thumb2 mode, offset must be an immediate.
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+ !Subtarget->isThumb2())
+ std::swap(Base, Offset);
+
+ // Post-indexed load / store update the base pointer.
+ if (Ptr != Base)
+ return false;
+ }
+
AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
return true;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index d8a230f..9c7517c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -236,13 +236,19 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
- virtual const ARMSubtarget* getSubtarget() const {
+ const ARMSubtarget* getSubtarget() const {
return Subtarget;
}
+ /// getRegClassFor - Return the register class that should be used for the
+ /// specified value type.
+ virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ Sched::Preference getSchedulingPreference(SDNode *N) const;
+
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -281,7 +287,8 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const;
- SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -296,20 +303,12 @@ namespace llvm {
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index b466d0d..d487df1 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -228,7 +228,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin,
"", itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
}
@@ -240,7 +240,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -252,7 +252,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = !strconcat(opc, asm);
+ let AsmString = !strconcat(opc, asm);
let Pattern = pattern;
let isPredicable = 0;
list<Predicate> Predicates = [IsARM];
@@ -268,7 +268,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
- let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
+ let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -280,7 +280,7 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -959,7 +959,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb];
}
@@ -995,7 +995,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb1Only];
}
@@ -1140,7 +1140,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
- let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+ let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
}
@@ -1152,7 +1152,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
}
@@ -1163,7 +1163,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb1Only];
}
@@ -1280,7 +1280,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [HasVFP2];
}
@@ -1292,7 +1292,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [HasVFP2];
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index ce5f2f8..f3156d9 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -46,6 +46,7 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
@@ -100,7 +101,10 @@ def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
-def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+ SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+ SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
[SDNPHasChain]>;
@@ -128,6 +132,8 @@ def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
def HasNEON : Predicate<"Subtarget->hasNEON()">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">;
@@ -654,12 +660,12 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
- "@ ADJCALLSTACKUP $amt1",
+ "${:comment} ADJCALLSTACKUP $amt1",
[(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKDOWN :
PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
- "@ ADJCALLSTACKDOWN $amt",
+ "${:comment} ADJCALLSTACKDOWN $amt",
[(ARMcallseq_start timm:$amt)]>;
}
@@ -789,8 +795,11 @@ def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
}
// A5.4 Permanently UNDEFINED instructions.
-def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "",
- [/* For disassembly only; pattern left blank */]>,
+// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
+// binutils
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ ".long 0xe7ffdefe ${:comment} trap", [(trap)]>,
Requires<[IsARM]> {
let Inst{27-25} = 0b011;
let Inst{24-20} = 0b11111;
@@ -843,25 +852,19 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
Pseudo, IIC_iALUi,
- !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(",
- "${:private}PCRELL${:uid}+8))\n"),
- !strconcat("${:private}PCRELL${:uid}:\n\t",
- "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
- []>;
+ "adr$p\t$dst, #$label", []>;
def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
- Pseudo, IIC_iALUi,
- !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, "
- "(${label}_${id}-(",
- "${:private}PCRELL${:uid}+8))\n"),
- !strconcat("${:private}PCRELL${:uid}:\n\t",
- "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
- []> {
+ Pseudo, IIC_iALUi,
+ "adr$p\t$dst, #${label}_${id}", []> {
let Inst{25} = 1;
}
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -1134,7 +1137,8 @@ def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
[(set GPR:$dst, (load addrmode2:$addr))]>;
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+ isReMaterializable = 1 in
def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
"ldr", "\t$dst, $addr", []>;
@@ -1156,7 +1160,7 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoadr, "ldrsb", "\t$dst, $addr",
[(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoadr, "ldrd", "\t$dst1, $addr",
@@ -1215,7 +1219,7 @@ def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
"ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
Requires<[IsARM, HasV5TE]>;
-}
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
@@ -1264,7 +1268,7 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
[(truncstorei8 GPR:$src, addrmode2:$addr)]>;
// Store doubleword
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
StMiscFrm, IIC_iStorer,
"strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
@@ -1356,7 +1360,7 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb),
// Load / store multiple Instructions.
//
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeNone, LdStMulFrm, IIC_iLoadm,
@@ -1367,9 +1371,9 @@ def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
IndexModeUpd, LdStMulFrm, IIC_iLoadm,
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>;
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeNone, LdStMulFrm, IIC_iStorem,
@@ -1380,7 +1384,7 @@ def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
IndexModeUpd, LdStMulFrm, IIC_iStorem,
"stm${addr:submode}${p}\t$addr!, $srcs",
"$addr.addr = $wb", []>;
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
//===----------------------------------------------------------------------===//
// Move Instructions.
@@ -2198,6 +2202,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
IIC_iCMOVr, "mov", "\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -2221,6 +2226,7 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
RegConstraint<"$false = $dst">, UnaryDP {
let Inst{25} = 1;
}
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
@@ -2528,12 +2534,12 @@ let Defs =
def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+ "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
"add\t$val, pc, #8\n\t"
"str\t$val, [$src, #+4]\n\t"
"mov\tr0, #0\n\t"
"add\tpc, pc, #0\n\t"
- "mov\tr0, #1 @ eh_setjmp end", "",
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, HasVFP2]>;
}
@@ -2543,16 +2549,30 @@ let Defs =
def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+ "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
"add\t$val, pc, #8\n\t"
"str\t$val, [$src, #+4]\n\t"
"mov\tr0, #0\n\t"
"add\tpc, pc, #0\n\t"
- "mov\tr0, #1 @ eh_setjmp end", "",
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, SizeSpecial, IndexModeNone,
+ Pseudo, NoItinerary,
+ "ldr\tsp, [$src, #8]\n\t"
+ "ldr\t$scratch, [$src, #4]\n\t"
+ "ldr\tr7, [$src]\n\t"
+ "bx\t$scratch", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsARM, IsDarwin]>;
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index d5ce2b8..197ec16 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -115,7 +115,7 @@ def h64imm : Operand<i64> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
-let mayLoad = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vldmia to load a Q register as a D register pair.
// This is equivalent to VLDMD except that it has a Q register operand
// instead of a pair of D registers.
@@ -123,11 +123,6 @@ def VLDMQ
: AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
IndexModeNone, IIC_fpLoadm,
"vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
-def VLDMQ_UPD
- : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p),
- IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}",
- "$addr.base = $wb", []>;
// Use vld1 to load a Q register as a D register pair.
// This alternative to VLDMQ allows an alignment to be specified.
@@ -135,13 +130,9 @@ def VLDMQ_UPD
def VLD1q
: NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
-def VLD1q_UPD
- : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64",
- "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
-} // mayLoad = 1
+} // mayLoad = 1, neverHasSideEffects = 1
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1 in {
// Use vstmia to store a Q register as a D register pair.
// This is equivalent to VSTMD except that it has a Q register operand
// instead of a pair of D registers.
@@ -149,11 +140,6 @@ def VSTMQ
: AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
IndexModeNone, IIC_fpStorem,
"vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
-def VSTMQ_UPD
- : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p),
- IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}",
- "$addr.base = $wb", []>;
// Use vst1 to store a Q register as a D register pair.
// This alternative to VSTMQ allows an alignment to be specified.
@@ -161,14 +147,9 @@ def VSTMQ_UPD
def VST1q
: NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
-def VST1q_UPD
- : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, QPR:$src),
- IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset",
- "$addr.addr = $wb", []>;
-} // mayStore = 1
+} // mayStore = 1, neverHasSideEffects = 1
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
@@ -492,9 +473,9 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
// FIXME: Not yet implemented.
-} // mayLoad = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
@@ -807,7 +788,7 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
-} // mayStore = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
//===----------------------------------------------------------------------===//
@@ -815,27 +796,32 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
//===----------------------------------------------------------------------===//
// Extract D sub-registers of Q registers.
-// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6)
def DSubReg_i8_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
}]>;
def DSubReg_i16_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
}]>;
def DSubReg_i32_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
}]>;
def DSubReg_f64_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
}]>;
def DSubReg_f64_other_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()),
+ MVT::i32);
}]>;
// Extract S sub-registers of Q/D registers.
-// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.)
def SSubReg_f32_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32);
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
}]>;
// Translate lane numbers from Q registers to D subregs.
@@ -2829,11 +2815,21 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
// VMOV : Vector Move (Register)
+let neverHasSideEffects = 1 in {
def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+// Pseudo vector move instructions for QQ and QQQQ registers. This should
+// be expanded after register allocation is completed.
+def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
+ NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+
+def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
+ NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+} // neverHasSideEffects
+
// VMOV : Vector Move (Immediate)
// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
@@ -2871,6 +2867,7 @@ def vmovImm64 : PatLeaf<(build_vector), [{
// Note: Some of the cmode bits in the following VMOV instructions need to
// be encoded based on the immed values.
+let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
(ins h8imm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
@@ -2906,6 +2903,7 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
(ins h64imm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
[(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
+} // isReMaterializable
// VMOV : Vector Get Lane (move scalar to ARM core register)
@@ -3018,11 +3016,11 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
(INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>;
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
(VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
@@ -3034,15 +3032,15 @@ def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
- arm_dsubreg_0)>;
+ dsub_0)>;
def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
- arm_dsubreg_0)>;
+ dsub_0)>;
def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
- arm_dsubreg_0)>;
+ dsub_0)>;
// VDUP : Vector Duplicate (from ARM core register to all elements)
@@ -3376,27 +3374,27 @@ def VTBX4
class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
: NEONFPPat<(ResTy (OpNode SPR:$a)),
(EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0))),
- arm_ssubreg_0)>;
+ SPR:$a, ssub_0))),
+ ssub_0)>;
class N3VSPat<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
(EXTRACT_SUBREG (v2f32
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0),
+ SPR:$a, ssub_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, arm_ssubreg_0))),
- arm_ssubreg_0)>;
+ SPR:$b, ssub_0))),
+ ssub_0)>;
class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
(EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$acc, arm_ssubreg_0),
+ SPR:$acc, ssub_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0),
+ SPR:$a, ssub_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, arm_ssubreg_0)),
- arm_ssubreg_0)>;
+ SPR:$b, ssub_0)),
+ ssub_0)>;
// These need separate instructions because they must use DPR_VFP2 register
// class which have SPR sub-registers.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e3ca536..40f924b 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -127,12 +127,12 @@ def t_addrmode_sp : Operand<i32>,
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def tADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
- "@ tADJCALLSTACKUP $amt1",
+ "${:comment} tADJCALLSTACKUP $amt1",
[(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>;
def tADJCALLSTACKDOWN :
PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
- "@ tADJCALLSTACKDOWN $amt",
+ "${:comment} tADJCALLSTACKDOWN $amt",
[(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
}
@@ -254,14 +254,14 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
// Pseudo instruction that will expand into a tSUBspi + a copy.
let usesCustomInserter = 1 in { // Expanded after instruction selection.
def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
- NoItinerary, "@ sub\t$dst, $rhs", []>;
+ NoItinerary, "${:comment} sub\t$dst, $rhs", []>;
def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
- NoItinerary, "@ add\t$dst, $rhs", []>;
+ NoItinerary, "${:comment} add\t$dst, $rhs", []>;
let Defs = [CPSR] in
def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
- NoItinerary, "@ and\t$dst, $rhs", []>;
+ NoItinerary, "${:comment} and\t$dst, $rhs", []>;
} // usesCustomInserter
//===----------------------------------------------------------------------===//
@@ -374,7 +374,7 @@ let isBranch = 1, isTerminator = 1 in {
// Far jump
let Defs = [LR] in
def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
- "bl\t$target\t@ far jump",[]>;
+ "bl\t$target\t${:comment} far jump",[]>;
def tBR_JTr : T1JTI<(outs),
(ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
@@ -417,9 +417,13 @@ def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>,
}
}
-// A8.6.16 B: Encoding T1 -- for disassembly only
+// A8.6.16 B: Encoding T1
// If Inst{11-8} == 0b1110 then UNDEFINED
-def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 {
+// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
+// binutils
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br,
+ ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 {
let Inst{15-12} = 0b1101;
let Inst{11-8} = 0b1110;
}
@@ -476,7 +480,7 @@ def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
// Special instruction for restore. It cannot clobber condition register
// when it's expanded by eliminateCallFramePseudoInstr().
-let canFoldAsLoad = 1, mayLoad = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
"ldr", "\t$dst, $addr", []>,
T1LdStSP<{1,?,?}>;
@@ -490,7 +494,8 @@ def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+ isReMaterializable = 1 in
def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
"ldr", "\t$dst, $addr", []>,
T1LdStSP<{1,?,?}>;
@@ -527,7 +532,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
[(store tGPR:$src, t_addrmode_sp:$addr)]>,
T1LdStSP<{0,?,?}>;
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1 in {
// Special instruction for spill. It cannot clobber condition register
// when it's expanded by eliminateCallFramePseudoInstr().
def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
@@ -540,7 +545,7 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
//
// These requires base address to be written back or one of the loaded regs.
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def tLDM : T1I<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
IIC_iLoadm,
@@ -553,9 +558,9 @@ def tLDM_UPD : T1It<(outs tGPR:$wb),
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>,
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def tSTM_UPD : T1It<(outs tGPR:$wb),
(ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
IIC_iStorem,
@@ -866,11 +871,12 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
- NoItinerary, "@ tMOVCCr $cc",
+ NoItinerary, "${:comment} tMOVCCr $cc",
[/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
// 16-bit movcc in IT blocks for Thumb2.
+let neverHasSideEffects = 1 in {
def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
"mov", "\t$dst, $rhs", []>,
T1Special<{1,0,?,?}>;
@@ -878,9 +884,12 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
"mov", "\t$dst, $rhs", []>,
T1General<{1,0,0,?,?}>;
+} // neverHasSideEffects
// tLEApcrel - Load a pc-relative address into a register without offending the
// assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
"adr$p\t$dst, #$label", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
@@ -889,6 +898,7 @@ def tLEApcrelJT : T1I<(outs tGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// TLS Instructions
@@ -918,16 +928,32 @@ let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in {
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+ "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
"\tmov\t$val, pc\n"
- "\tadds\t$val, #9\n"
+ "\tadds\t$val, #7\n"
"\tstr\t$val, [$src, #4]\n"
"\tmovs\tr0, #0\n"
"\tb\t1f\n"
- "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+ "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, SizeSpecial, IndexModeNone,
+ Pseudo, NoItinerary,
+ "ldr\t$scratch, [$src, #8]\n\t"
+ "mov\tsp, $scratch\n\t"
+ "ldr\t$scratch, [$src, #4]\n\t"
+ "ldr\tr7, [$src]\n\t"
+ "bx\t$scratch", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb, IsDarwin]>;
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -1011,7 +1037,7 @@ def : T1Pat<(i32 imm0_255_comp:$src),
// scheduling.
let isReMaterializable = 1 in
def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 742bd40..b91c089 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -185,8 +185,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
- opc, ".w\t$dst, $src",
+ def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+ opc, ".w\t$dst, $src",
[(set GPR:$dst, (opnode GPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -198,9 +198,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
- opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
+ def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+ opc, ".w\t$dst, $src",
+ [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -210,7 +210,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
}
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
-// binary operation that produces a value. These are predicable and can be
+/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0, string wide =""> {
@@ -259,23 +259,23 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_bin_irs counterpart.
multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
- opc, ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ opc, ".w\t$dst, $rhs, $lhs",
+ [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
- let Inst{20} = 0; // The S bit.
+ let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
- opc, "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ opc, "\t$dst, $rhs, $lhs",
+ [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = 0; // The S bit.
+ let Inst{20} = ?; // The S bit.
}
}
@@ -461,10 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
- IIC_iALUi,
- !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"),
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
+ [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -472,10 +471,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{15} = 0;
}
// shifted register
- def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
- IIC_iALUsi,
- !strconcat(opc, "${s}\t$dst, $rhs, $lhs"),
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ !strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
+ [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -639,7 +637,8 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set GPR:$dst, (opnode GPR:$src))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -650,7 +649,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -665,7 +665,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set GPR:$dst, (opnode GPR:$src))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -676,7 +677,8 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
}
def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -717,7 +719,8 @@ multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS",
- [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]> {
+ [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -728,7 +731,8 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
[(set GPR:$dst, (opnode GPR:$LHS,
- (rotr GPR:$RHS, rot_imm:$rot)))]> {
+ (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -771,6 +775,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
"adr$p.w\t$dst, #$label", []> {
let Inst{31-27} = 0b11110;
@@ -792,6 +798,7 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst),
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
+} // neverHasSideEffects
// ADD r, sp, {so_imm|i12}
def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -856,9 +863,11 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
let Inst{15} = 0;
}
-// Signed and unsigned division, for disassembly only
+// Signed and unsigned division on v7-M
def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
- "sdiv", "\t$dst, $a, $b", []> {
+ "sdiv", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>,
+ Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
let Inst{20} = 0b1;
@@ -867,7 +876,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
}
def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
- "udiv", "\t$dst, $a, $b", []> {
+ "udiv", "\t$dst, $a, $b",
+ [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>,
+ Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
let Inst{20} = 0b1;
@@ -878,11 +889,11 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
let usesCustomInserter = 1 in { // Expanded after instruction selection.
def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>;
+ NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>;
def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- NoItinerary, "@ subw\t$dst, $sp, $imm", []>;
+ NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>;
def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
- NoItinerary, "@ sub\t$dst, $sp, $rhs", []>;
+ NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>;
} // usesCustomInserter
@@ -902,7 +913,7 @@ defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>;
defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
(ins t2addrmode_imm8s4:$addr),
@@ -912,7 +923,7 @@ def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
"ldrd", "\t$dst1, $addr", []> {
let Inst{19-16} = 0b1111; // Rn
}
-}
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
@@ -955,7 +966,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
(t2LDRHpci tconstpool:$addr)>;
// Indexed loads
-let mayLoad = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1 in {
def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
@@ -1011,7 +1022,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
"ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb",
[]>;
-}
+} // mayLoad = 1, neverHasSideEffects = 1
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
// for disassembly only.
@@ -1041,7 +1052,7 @@ defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
-let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
IIC_iStorer, "strd", "\t$src1, $addr", []>;
@@ -1204,7 +1215,7 @@ defm t2PLI : T2Ipl<1, 0, "pli">;
// Load / store multiple Instructions.
//
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops), IIC_iLoadm,
"ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
@@ -1227,9 +1238,9 @@ def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
let Inst{21} = 1; // The W bit.
let Inst{20} = 1; // Load
}
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops), IIC_iStorem,
"stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
@@ -1253,7 +1264,7 @@ def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
let Inst{21} = 1; // The W bit.
let Inst{20} = 0; // Store
}
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
//===----------------------------------------------------------------------===//
// Move Instructions.
@@ -1564,9 +1575,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
}
let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- "lsrs.w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
+def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ "lsrs", ".w\t$dst, $src, #1",
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1577,9 +1588,9 @@ def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- "asrs.w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
+def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ "asrs", ".w\t$dst, $src, #1",
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2058,7 +2069,8 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
(and (shl GPR:$src2, (i32 imm:$shamt)),
- 0xFFFF0000)))]> {
+ 0xFFFF0000)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
@@ -2068,15 +2080,18 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
// Alternate cases for PKHBT where identities eliminate some nodes.
def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
- (t2PKHBT GPR:$src1, GPR:$src2, 0)>;
+ (t2PKHBT GPR:$src1, GPR:$src2, 0)>,
+ Requires<[HasT2ExtractPack]>;
def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+ (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>,
+ Requires<[HasT2ExtractPack]>;
def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
(and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]> {
+ 0xFFFF)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
@@ -2087,10 +2102,12 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (t2PKHTB GPR:$src1, GPR:$src2, 16)>;
+ (t2PKHTB GPR:$src1, GPR:$src2, 16)>,
+ Requires<[HasT2ExtractPack]>;
def : T2Pat<(or (and GPR:$src1, 0xFFFF0000),
(and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+ (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>,
+ Requires<[HasT2ExtractPack]>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
@@ -2127,6 +2144,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
"mov", ".w\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -2178,6 +2196,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
(ins GPR:$false, GPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
@@ -2378,13 +2397,13 @@ let Defs =
D31 ] in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+ "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
"\tmov\t$val, pc\n"
- "\tadds\t$val, #9\n"
+ "\tadds\t$val, #7\n"
"\tstr\t$val, [$src, #4]\n"
"\tmovs\tr0, #0\n"
"\tb\t1f\n"
- "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+ "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
@@ -2394,13 +2413,13 @@ let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+ "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
"\tmov\t$val, pc\n"
- "\tadds\t$val, #9\n"
+ "\tadds\t$val, #7\n"
"\tstr\t$val, [$src, #4]\n"
"\tmovs\tr0, #0\n"
"\tb\t1f\n"
- "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+ "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
@@ -2672,7 +2691,7 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 36fcaa1..54474cf 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -76,7 +76,7 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
// Load / store multiple Instructions.
//
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
"vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
@@ -104,9 +104,9 @@ def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
"$addr.base = $wb", []> {
let Inst{20} = 1;
}
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
"vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
@@ -134,7 +134,7 @@ def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
"$addr.base = $wb", []> {
let Inst{20} = 0;
}
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
@@ -313,6 +313,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
IIC_fpMOVIS, "vmov", "\t$dst, $src",
[(set SPR:$dst, (bitconvert GPR:$src))]>;
+let neverHasSideEffects = 1 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
@@ -326,6 +327,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
[/* For disassembly only; pattern left blank */]> {
let Inst{7-6} = 0b00;
}
+} // neverHasSideEffects
// FMDHR: GPR -> SPR
// FMDLR: GPR -> SPR
@@ -337,6 +339,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
let Inst{7-6} = 0b00;
}
+let neverHasSideEffects = 1 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
@@ -606,6 +609,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
// FP Conditional moves.
//
+let neverHasSideEffects = 1 in {
def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs DPR:$dst), (ins DPR:$false, DPR:$true),
IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
@@ -629,7 +633,7 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
[/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
-
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Misc.
@@ -651,6 +655,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
// FPSCR <-> GPR (for disassembly only)
+let neverHasSideEffects = 1 in {
let Uses = [FPSCR] in {
def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
"\t$dst, fpscr",
@@ -674,6 +679,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
let Inst{4} = 1;
}
}
+} // neverHasSideEffects
// Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in {
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index b31a4fa..5f6d7ee 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -318,6 +318,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
*((intptr_t*)RelocPos) |= ResultPtr;
break;
}
+ case ARM::reloc_arm_movw: {
+ ResultPtr = ResultPtr & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ case ARM::reloc_arm_movt: {
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
}
}
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 041afd0..8edfb9a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -23,16 +23,6 @@ namespace llvm {
class ARMBaseInstrInfo;
class Type;
-namespace ARM {
- /// SubregIndex - The index of various subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// ARMRegisterInfo.td file.
- enum SubregIndex {
- SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4,
- DSUBREG_0 = 5, DSUBREG_1 = 6
- };
-}
-
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
public:
ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 0d4200c..6beca8b 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -23,6 +23,44 @@ class ARMFReg<bits<6> num, string n> : Register<n> {
let Namespace = "ARM";
}
+// Subregister indices.
+let Namespace = "ARM" in {
+// Note: Code depends on these having consecutive numbers.
+def ssub_0 : SubRegIndex;
+def ssub_1 : SubRegIndex;
+def ssub_2 : SubRegIndex; // In a Q reg.
+def ssub_3 : SubRegIndex;
+def ssub_4 : SubRegIndex; // In a QQ reg.
+def ssub_5 : SubRegIndex;
+def ssub_6 : SubRegIndex;
+def ssub_7 : SubRegIndex;
+def ssub_8 : SubRegIndex; // In a QQQQ reg.
+def ssub_9 : SubRegIndex;
+def ssub_10 : SubRegIndex;
+def ssub_11 : SubRegIndex;
+def ssub_12 : SubRegIndex;
+def ssub_13 : SubRegIndex;
+def ssub_14 : SubRegIndex;
+def ssub_15 : SubRegIndex;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex;
+def dsub_3 : SubRegIndex;
+def dsub_4 : SubRegIndex;
+def dsub_5 : SubRegIndex;
+def dsub_6 : SubRegIndex;
+def dsub_7 : SubRegIndex;
+
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex;
+def qsub_3 : SubRegIndex;
+
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+}
+
// Integer registers
def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
@@ -58,9 +96,9 @@ def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
-def SDummy : ARMFReg<63, "sINVALID">;
// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
def D0 : ARMReg< 0, "d0", [S0, S1]>;
def D1 : ARMReg< 1, "d1", [S2, S3]>;
def D2 : ARMReg< 2, "d2", [S4, S5]>;
@@ -77,6 +115,7 @@ def D12 : ARMReg<12, "d12", [S24, S25]>;
def D13 : ARMReg<13, "d13", [S26, S27]>;
def D14 : ARMReg<14, "d14", [S28, S29]>;
def D15 : ARMReg<15, "d15", [S30, S31]>;
+}
// VFP3 defines 16 additional double registers
def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">;
@@ -89,6 +128,9 @@ def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">;
def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">;
// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1],
+ CompositeIndices = [(ssub_2 dsub_1, ssub_0),
+ (ssub_3 dsub_1, ssub_1)] in {
def Q0 : ARMReg< 0, "q0", [D0, D1]>;
def Q1 : ARMReg< 1, "q1", [D2, D3]>;
def Q2 : ARMReg< 2, "q2", [D4, D5]>;
@@ -97,6 +139,8 @@ def Q4 : ARMReg< 4, "q4", [D8, D9]>;
def Q5 : ARMReg< 5, "q5", [D10, D11]>;
def Q6 : ARMReg< 6, "q6", [D12, D13]>;
def Q7 : ARMReg< 7, "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
def Q8 : ARMReg< 8, "q8", [D16, D17]>;
def Q9 : ARMReg< 9, "q9", [D18, D19]>;
def Q10 : ARMReg<10, "q10", [D20, D21]>;
@@ -105,6 +149,51 @@ def Q12 : ARMReg<12, "q12", [D24, D25]>;
def Q13 : ARMReg<13, "q13", [D26, D27]>;
def Q14 : ARMReg<14, "q14", [D28, D29]>;
def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g. vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technical the
+// starting D register number doesn't have to be multiple of 4. e.g.
+// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register
+// stuffs very messy.
+let SubRegIndices = [qsub_0, qsub_1] in {
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1),
+ (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1),
+ (ssub_6 qsub_1, ssub_2), (ssub_7 qsub_1, ssub_3)] in {
+def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>;
+def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>;
+def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>;
+def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>;
+}
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
+def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>;
+def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
+def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
+def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
+}
+}
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+let SubRegIndices = [qqsub_0, qqsub_1] in {
+let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+ (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+ (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3),
+ (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1),
+ (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3),
+ (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5),
+ (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in {
+def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
+def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
+}
+let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+ (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+ (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
+def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
+def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
+}
+}
// Current Program Status Register.
def CPSR : ARMReg<0, "cpsr">;
@@ -270,11 +359,6 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32,
[S0, S1, S2, S3, S4, S5, S6, S7,
S8, S9, S10, S11, S12, S13, S14, S15]>;
-// Dummy f32 regclass to represent impossible subreg indices.
-def SPR_INVALID : RegisterClass<"ARM", [f32], 32, [SDummy]> {
- let CopyCost = -1;
-}
-
// Scalar double precision floating point / generic 64-bit vector register
// class.
// ARM requires only word alignment for double. It's more performant if it
@@ -284,7 +368,6 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23,
D24, D25, D26, D27, D28, D29, D30, D31]> {
- let SubRegClassList = [SPR_INVALID, SPR_INVALID];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -332,79 +415,68 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
[D0, D1, D2, D3, D4, D5, D6, D7,
D8, D9, D10, D11, D12, D13, D14, D15]> {
- let SubRegClassList = [SPR, SPR];
+ let SubRegClasses = [(SPR ssub_0, ssub_1)];
}
// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
[D0, D1, D2, D3, D4, D5, D6, D7]> {
- let SubRegClassList = [SPR_8, SPR_8];
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
}
// Generic 128-bit vector register class.
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> {
- let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID,
- DPR, DPR];
+ let SubRegClasses = [(DPR dsub_0, dsub_1)];
}
// Subset of QPR that have 32-bit SPR subregs.
def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
128,
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> {
- let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2];
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1)];
}
// Subset of QPR that have DPR_8 and SPR_8 subregs.
def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
128,
[Q0, Q1, Q2, Q3]> {
- let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8];
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_8 dsub_0, dsub_1)];
+}
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64],
+ 256,
+ [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR qsub_0, qsub_1)];
+}
+
+// Subset of QQPR that have 32-bit SPR subregs.
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
+ 256,
+ [QQ0, QQ1, QQ2, QQ3]> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR_VFP2 qsub_0, qsub_1)];
+
+}
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64],
+ 256,
+ [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
+ dsub_4, dsub_5, dsub_6, dsub_7),
+ (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
}
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
-//===----------------------------------------------------------------------===//
-// Subregister Set Definitions... now that we have all of the pieces, define the
-// sub registers for each register.
-//
-
-def arm_ssubreg_0 : PatLeaf<(i32 1)>;
-def arm_ssubreg_1 : PatLeaf<(i32 2)>;
-def arm_ssubreg_2 : PatLeaf<(i32 3)>;
-def arm_ssubreg_3 : PatLeaf<(i32 4)>;
-def arm_dsubreg_0 : PatLeaf<(i32 5)>;
-def arm_dsubreg_1 : PatLeaf<(i32 6)>;
-
-// S sub-registers of D registers.
-def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15],
- [S0, S2, S4, S6, S8, S10, S12, S14,
- S16, S18, S20, S22, S24, S26, S28, S30]>;
-def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15],
- [S1, S3, S5, S7, S9, S11, S13, S15,
- S17, S19, S21, S23, S25, S27, S29, S31]>;
-
-// S sub-registers of Q registers.
-def : SubRegSet<1, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7],
- [S0, S4, S8, S12, S16, S20, S24, S28]>;
-def : SubRegSet<2, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7],
- [S1, S5, S9, S13, S17, S21, S25, S29]>;
-def : SubRegSet<3, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7],
- [S2, S6, S10, S14, S18, S22, S26, S30]>;
-def : SubRegSet<4, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7],
- [S3, S7, S11, S15, S19, S23, S27, S31]>;
-
-// D sub-registers of Q registers.
-def : SubRegSet<5, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
- Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
- [D0, D2, D4, D6, D8, D10, D12, D14,
- D16, D18, D20, D22, D24, D26, D28, D30]>;
-def : SubRegSet<6, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
- Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
- [D1, D3, D5, D7, D9, D11, D13, D15,
- D17, D19, D21, D23, D25, D27, D29, D31]>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
index 2cc2950..86e7206 100644
--- a/lib/Target/ARM/ARMRelocations.h
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -47,7 +47,13 @@ namespace llvm {
reloc_arm_pic_jt,
// reloc_arm_branch - Branch address relocation.
- reloc_arm_branch
+ reloc_arm_branch,
+
+ // reloc_arm_movt - MOVT immediate relocation.
+ reloc_arm_movt,
+
+ // reloc_arm_movw - MOVW immediate relocation.
+ reloc_arm_movw
};
}
}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index c04ee38..a289407 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -12,11 +12,123 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-selectiondag-info"
-#include "ARMSelectionDAGInfo.h"
+#include "ARMTargetMachine.h"
using namespace llvm;
-ARMSelectionDAGInfo::ARMSelectionDAGInfo() {
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
}
ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff, isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcSV, SrcSVOff + SrcOff, false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff, false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index afe9a47..d7d00c2 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -19,9 +19,24 @@
namespace llvm {
class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
public:
- ARMSelectionDAGInfo();
+ explicit ARMSelectionDAGInfo(const TargetMachine &TM);
~ARMSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const;
};
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index b11580a..10fd257 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -39,6 +39,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, IsR9Reserved(ReserveR9)
, UseMovt(UseMOVT)
, HasFP16(false)
+ , HasHardwareDivide(false)
+ , HasT2ExtractPack(false)
, stackAlignment(4)
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
@@ -73,6 +75,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
unsigned SubVer = TT[Idx];
if (SubVer >= '7' && SubVer <= '9') {
ARMArchVersion = V7A;
+ if (Len >= Idx+2 && TT[Idx+1] == 'm')
+ ARMArchVersion = V7M;
} else if (SubVer == '6') {
ARMArchVersion = V6;
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 288a19a..8332bba 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
- V4, V4T, V5T, V5TE, V6, V6T2, V7A
+ V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M
};
enum ARMFPEnum {
@@ -39,7 +39,7 @@ protected:
};
/// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
- /// V6, V6T2, V7A.
+ /// V6, V6T2, V7A, V7M.
ARMArchEnum ARMArchVersion;
/// ARMFPUType - Floating Point Unit type.
@@ -74,6 +74,13 @@ protected:
/// only so far)
bool HasFP16;
+ /// HasHardwareDivide - True if subtarget supports [su]div
+ bool HasHardwareDivide;
+
+ /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+ /// instructions.
+ bool HasT2ExtractPack;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -123,6 +130,8 @@ protected:
bool hasNEON() const { return ARMFPUType >= NEON; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
+ bool hasDivide() const { return HasHardwareDivide; }
+ bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 662e61e..b4a9252 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -62,7 +62,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
- TLInfo(*this) {
+ TLInfo(*this),
+ TSInfo(*this) {
}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
@@ -76,7 +77,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
"i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
- TLInfo(*this) {
+ TLInfo(*this),
+ TSInfo(*this) {
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 4e205df..a222e57 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -21,6 +21,7 @@
#include "ARMJITInfo.h"
#include "ARMSubtarget.h"
#include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/OwningPtr.h"
@@ -63,6 +64,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
ARMInstrInfo InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
public:
ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -75,6 +77,10 @@ public:
return &TLInfo;
}
+ virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetData *getTargetData() const { return &DataLayout; }
};
@@ -88,6 +94,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
OwningPtr<ARMBaseInstrInfo> InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
public:
ThumbTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -101,6 +108,10 @@ public:
return &TLInfo;
}
+ virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
/// returns either Thumb1InstrInfo or Thumb2InstrInfo
virtual const ARMBaseInstrInfo *getInstrInfo() const {
return InstrInfo.get();
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 80a9d2d..d95efdb 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -319,16 +319,16 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
if (Modifier && strcmp(Modifier, "dregpair") == 0) {
- unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, 5);// arm_dsubreg_0
- unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, 6);// arm_dsubreg_1
+ unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0);
+ unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1);
O << '{'
<< getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
<< '}';
} else if (Modifier && strcmp(Modifier, "lane") == 0) {
unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
unsigned DReg =
- TM.getRegisterInfo()->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1,
- &ARM::DPR_VFP2RegClass);
+ TM.getRegisterInfo()->getMatchingSuperReg(Reg,
+ RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
} else {
assert(!MO.getSubReg() && "Subregs should be eliminated!");
@@ -1375,13 +1375,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
// This is a hack that lowers as a two instruction sequence.
unsigned DstReg = MI->getOperand(0).getReg();
- unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
-
+ const MachineOperand &MO = MI->getOperand(1);
+ MCOperand V1, V2;
+ if (MO.isImm()) {
+ unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
+ V1 = MCOperand::CreateImm(ImmVal & 65535);
+ V2 = MCOperand::CreateImm(ImmVal >> 16);
+ } else if (MO.isGlobal()) {
+ MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO);
+ const MCSymbolRefExpr *SymRef1 =
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_LO16, OutContext);
+ const MCSymbolRefExpr *SymRef2 =
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_HI16, OutContext);
+ V1 = MCOperand::CreateExpr(SymRef1);
+ V2 = MCOperand::CreateExpr(SymRef2);
+ } else {
+ MI->dump();
+ llvm_unreachable("cannot handle this operand");
+ }
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
- TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm)
+ TmpInst.addOperand(V1); // lower16(imm)
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
@@ -1395,7 +1414,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
TmpInst.setOpcode(ARM::MOVTi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg
- TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16)); // upper16(imm)
+ TmpInst.addOperand(V2); // upper16(imm)
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index ac6331f..2b94b76 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -195,8 +195,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
// FIXME: Breaks e.g. ARM/vmul.ll.
assert(0);
/*
- unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0
- unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1
+ unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0);
+ unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1);
O << '{'
<< getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
<< '}';*/
@@ -217,7 +217,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"));
O << '#' << Op.getImm();
} else {
- assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0)
+ llvm_unreachable("Unsupported modifier");
assert(Op.isExpr() && "unknown operand kind in printOperand");
O << *Op.getExpr();
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
index 383d30d..b81a306 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
@@ -26,7 +26,7 @@ namespace llvm {
//class ARMSubtarget;
/// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class VISIBILITY_HIDDEN ARMMCInstLower {
+class LLVM_LIBRARY_VISIBILITY ARMMCInstLower {
MCContext &Ctx;
Mangler &Mang;
AsmPrinter &Printer;
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 3c0414d..0a4400c 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -118,7 +118,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
const TargetMachine &TM = Fn.getTarget();
- if (AFI->isThumbFunction())
+ if (AFI->isThumb1OnlyFunction())
return false;
TRI = TM.getRegisterInfo();
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index ef6bf3a..a725898 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -33,7 +33,8 @@ namespace {
private:
bool FormsRegSequence(MachineInstr *MI,
- unsigned FirstOpnd, unsigned NumRegs);
+ unsigned FirstOpnd, unsigned NumRegs,
+ unsigned Offset, unsigned Stride) const;
bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
};
@@ -338,24 +339,122 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
return false;
}
-bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
- unsigned FirstOpnd, unsigned NumRegs) {
- MachineInstr *RegSeq = 0;
+bool
+NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
+ unsigned FirstOpnd, unsigned NumRegs,
+ unsigned Offset, unsigned Stride) const {
+ MachineOperand &FMO = MI->getOperand(FirstOpnd);
+ assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand");
+ unsigned VirtReg = FMO.getReg();
+ (void)VirtReg;
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "expected a virtual register");
+
+ unsigned LastSubIdx = 0;
+ if (FMO.isDef()) {
+ MachineInstr *RegSeq = 0;
+ for (unsigned R = 0; R < NumRegs; ++R) {
+ const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "expected a virtual register");
+ // Feeding into a REG_SEQUENCE.
+ if (!MRI->hasOneNonDBGUse(VirtReg))
+ return false;
+ MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
+ if (!UseMI->isRegSequence())
+ return false;
+ if (RegSeq && RegSeq != UseMI)
+ return false;
+ unsigned OpIdx = 1 + (Offset + R * Stride) * 2;
+ if (UseMI->getOperand(OpIdx).getReg() != VirtReg)
+ llvm_unreachable("Malformed REG_SEQUENCE instruction!");
+ unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm();
+ if (LastSubIdx) {
+ if (LastSubIdx != SubIdx-Stride)
+ return false;
+ } else {
+ // Must start from dsub_0 or qsub_0.
+ if (SubIdx != (ARM::dsub_0+Offset) &&
+ SubIdx != (ARM::qsub_0+Offset))
+ return false;
+ }
+ RegSeq = UseMI;
+ LastSubIdx = SubIdx;
+ }
+
+ // In the case of vld3, etc., make sure the trailing operand of
+ // REG_SEQUENCE is an undef.
+ if (NumRegs == 3) {
+ unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2;
+ const MachineOperand &MO = RegSeq->getOperand(OpIdx);
+ unsigned VirtReg = MO.getReg();
+ MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
+ if (!DefMI || !DefMI->isImplicitDef())
+ return false;
+ }
+ return true;
+ }
+
+ unsigned LastSrcReg = 0;
+ SmallVector<unsigned, 4> SubIds;
for (unsigned R = 0; R < NumRegs; ++R) {
- MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
unsigned VirtReg = MO.getReg();
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"expected a virtual register");
- if (!MRI->hasOneNonDBGUse(VirtReg))
+ // Extracting from a Q or QQ register.
+ MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
+ if (!DefMI || !DefMI->isExtractSubreg())
return false;
- MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
- if (UseMI->getOpcode() != TargetOpcode::REG_SEQUENCE)
+ VirtReg = DefMI->getOperand(1).getReg();
+ if (LastSrcReg && LastSrcReg != VirtReg)
return false;
- if (RegSeq && RegSeq != UseMI)
+ LastSrcReg = VirtReg;
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ if (RC != ARM::QPRRegisterClass &&
+ RC != ARM::QQPRRegisterClass &&
+ RC != ARM::QQQQPRRegisterClass)
return false;
- RegSeq = UseMI;
+ unsigned SubIdx = DefMI->getOperand(2).getImm();
+ if (LastSubIdx) {
+ if (LastSubIdx != SubIdx-Stride)
+ return false;
+ } else {
+ // Must start from dsub_0 or qsub_0.
+ if (SubIdx != (ARM::dsub_0+Offset) &&
+ SubIdx != (ARM::qsub_0+Offset))
+ return false;
+ }
+ SubIds.push_back(SubIdx);
+ LastSubIdx = SubIdx;
}
+
+ // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
+ // currently required for correctness. e.g.
+ // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
+ // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
+ // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
+ // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
+ // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5
+ // respectively.
+ // We need to change how we model uses of REG_SEQUENCE.
+ for (unsigned R = 0; R < NumRegs; ++R) {
+ MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ unsigned OldReg = MO.getReg();
+ MachineInstr *DefMI = MRI->getVRegDef(OldReg);
+ assert(DefMI->isExtractSubreg());
+ MO.setReg(LastSrcReg);
+ MO.setSubReg(SubIds[R]);
+ if (R != 0)
+ MO.setIsKill(false);
+ // Delete the EXTRACT_SUBREG if its result is now dead.
+ if (MRI->use_empty(OldReg))
+ DefMI->eraseFromParent();
+ }
+
return true;
}
@@ -368,7 +467,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
unsigned FirstOpnd, NumRegs, Offset, Stride;
if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
continue;
- if (FormsRegSequence(MI, FirstOpnd, NumRegs))
+ if (llvm::ModelWithRegSequence() &&
+ FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
MachineBasicBlock::iterator NextI = llvm::next(MBBI);
@@ -390,7 +490,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
if (MO.isUse()) {
// Insert a copy from VirtReg.
TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
- ARM::DPRRegisterClass, ARM::DPRRegisterClass);
+ ARM::DPRRegisterClass, ARM::DPRRegisterClass,
+ DebugLoc());
if (MO.isKill()) {
MachineInstr *CopyMI = prior(MBBI);
CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
@@ -399,7 +500,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
} else if (MO.isDef() && !MO.isDead()) {
// Add a copy to VirtReg.
TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
- ARM::DPRRegisterClass, ARM::DPRRegisterClass);
+ ARM::DPRRegisterClass, ARM::DPRRegisterClass,
+ DebugLoc());
}
}
}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index b10c3f7..fae84d4 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -17,6 +17,7 @@
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/SmallVector.h"
@@ -36,10 +37,8 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == ARM::GPRRegisterClass) {
if (SrcRC == ARM::GPRRegisterClass) {
BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
@@ -97,10 +96,8 @@ canFoldMemoryOperand(const MachineInstr *MI,
void Thumb1InstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
assert((RC == ARM::tGPRRegisterClass ||
(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
isARMLowRegister(SrcReg))) && "Unknown regclass!");
@@ -108,6 +105,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == ARM::tGPRRegisterClass ||
(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
isARMLowRegister(SrcReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -124,10 +124,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void Thumb1InstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
assert((RC == ARM::tGPRRegisterClass ||
(TargetRegisterInfo::isPhysicalRegister(DestReg) &&
isARMLowRegister(DestReg))) && "Unknown regclass!");
@@ -135,6 +133,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == ARM::tGPRRegisterClass ||
(TargetRegisterInfo::isPhysicalRegister(DestReg) &&
isARMLowRegister(DestReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -150,7 +151,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
bool Thumb1InstrInfo::
spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -161,9 +163,22 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
AddDefaultPred(MIB);
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- MIB.addReg(Reg, RegState::Kill);
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill) {
+ MBB.addLiveIn(Reg);
+ MIB.addReg(Reg, RegState::Kill);
+ }
}
return true;
}
@@ -171,7 +186,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool Thumb1InstrInfo::
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (CSI.empty())
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 516ddf1..c937296 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -39,25 +39,30 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index b143bd9..531d5e9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -40,10 +40,8 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == ARM::GPRRegisterClass) {
if (SrcRC == ARM::GPRRegisterClass) {
BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
@@ -63,17 +61,18 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
}
// Handle SPR, DPR, and QPR copies.
- return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC);
+ return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL);
}
void Thumb2InstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -87,17 +86,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
return;
}
- ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC);
+ ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
}
void Thumb2InstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -110,7 +110,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
return;
}
- ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC);
+ ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index a0f89a6..2948770 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -35,17 +35,20 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index ba403e2..3aba363 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -146,16 +146,14 @@ bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
//cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n";
if (DestRC != SrcRC) {
// Not yet supported!
return false;
}
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
if (DestRC == Alpha::GPRCRegisterClass) {
BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
.addReg(SrcReg)
@@ -180,7 +178,8 @@ void
AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
//cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
// << FrameIdx << "\n";
//BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
@@ -208,7 +207,8 @@ void
AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
//cerr << "Trying to load " << getPrettyName(DestReg) << " to "
// << FrameIdx << "\n";
DebugLoc DL;
@@ -399,7 +399,6 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
.addReg(Alpha::R31)
.addReg(Alpha::R31);
@@ -430,7 +429,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ DebugLoc());
assert(Ok && "Couldn't assign to global base register!");
Ok = Ok; // Silence warning when assertions are turned off.
RegInfo.addLiveIn(Alpha::R29);
@@ -457,7 +457,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ DebugLoc());
assert(Ok && "Couldn't assign to global return address register!");
Ok = Ok; // Silence warning when assertions are turned off.
RegInfo.addLiveIn(Alpha::R26);
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index c3b6044..7d7365b 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -48,16 +48,19 @@ public:
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index d5d5e02..a47a29b 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -836,7 +836,7 @@ class br_fcc<bits<6> opc, string asmstr>
!strconcat(asmstr, " $R,$dst"), s_fbr>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-let Ra = 31 in
+let Ra = 31, isBarrier = 1 in
def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst),
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
index 0eb7b8f..f1958fe 100644
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "alpha-selectiondag-info"
-#include "AlphaSelectionDAGInfo.h"
+#include "AlphaTargetMachine.h"
using namespace llvm;
-AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() {
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
index 70889ae..3405cc0 100644
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.h
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class AlphaTargetMachine;
+
class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- AlphaSelectionDAGInfo();
+ explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
~AlphaSelectionDAGInfo();
};
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 5169a01..fc9be03 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -32,7 +32,8 @@ AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
JITInfo(*this),
Subtarget(TT, FS),
- TLInfo(*this) {
+ TLInfo(*this),
+ TSInfo(*this) {
setRelocationModel(Reloc::PIC_);
}
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 0990f6d..153944e 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -20,6 +20,7 @@
#include "AlphaInstrInfo.h"
#include "AlphaJITInfo.h"
#include "AlphaISelLowering.h"
+#include "AlphaSelectionDAGInfo.h"
#include "AlphaSubtarget.h"
namespace llvm {
@@ -33,6 +34,7 @@ class AlphaTargetMachine : public LLVMTargetMachine {
AlphaJITInfo JITInfo;
AlphaSubtarget Subtarget;
AlphaTargetLowering TLInfo;
+ AlphaSelectionDAGInfo TSInfo;
public:
AlphaTargetMachine(const Target &T, const std::string &TT,
@@ -47,6 +49,9 @@ public:
virtual const AlphaTargetLowering* getTargetLowering() const {
return &TLInfo;
}
+ virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual AlphaJITInfo* getJITInfo() {
return &JITInfo;
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index cf1901b..73924b7 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -138,9 +138,8 @@ bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
unsigned DestReg,
unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (inClass(BF::ALLRegClass, DestReg, DestRC) &&
inClass(BF::ALLRegClass, SrcReg, SrcRC)) {
BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg);
@@ -196,7 +195,8 @@ BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned SrcReg,
bool isKill,
int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
if (inClass(BF::DPRegClass, SrcReg, RC)) {
@@ -242,7 +242,8 @@ BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg,
int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
if (inClass(BF::DPRegClass, DestReg, RC)) {
BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index ea3429c..c1dcd58 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -50,13 +50,15 @@ namespace llvm {
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill,
int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void storeRegToAddr(MachineFunction &MF,
unsigned SrcReg, bool isKill,
@@ -67,7 +69,8 @@ namespace llvm {
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 2471688..5cf350a 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -301,9 +301,9 @@ def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
def : Pat<(i16 (extloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>;
+ (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
def : Pat<(i16 (zextloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>;
+ (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
"$dst = b[$ptr + $off] (z);",
@@ -313,17 +313,17 @@ def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
(LOAD32p_imm16_8z P:$ptr, imm:$off)>;
def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
(EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
- bfin_subreg_lo16)>;
+ lo16)>;
def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
(EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
- bfin_subreg_lo16)>;
+ lo16)>;
def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
"$dst = b[$ptr] (x);",
[(set D:$dst, (sextloadi8 P:$ptr))]>;
def : Pat<(i16 (sextloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), bfin_subreg_lo16)>;
+ (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
"$dst = b[$ptr + $off] (x);",
@@ -331,7 +331,7 @@ def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
(EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
- bfin_subreg_lo16)>;
+ lo16)>;
// Memory loads without patterns
let mayLoad = 1 in {
@@ -468,16 +468,16 @@ def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
def : Pat<(truncstorei16 D:$val, PI:$ptr),
(STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
- bfin_subreg_lo16), PI:$ptr)>;
+ lo16), PI:$ptr)>;
def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
(STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
- bfin_subreg_hi16), PI:$ptr)>;
+ hi16), PI:$ptr)>;
def : Pat<(truncstorei8 D16L:$val, P:$ptr),
(STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
(i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
- bfin_subreg_lo16),
+ lo16),
P:$ptr)>;
//===----------------------------------------------------------------------===//
@@ -516,19 +516,19 @@ def : Pat<(sext_inreg D16L:$src, i8),
(EXTRACT_SUBREG (MOVEsext8
(INSERT_SUBREG (i32 (IMPLICIT_DEF)),
D16L:$src,
- bfin_subreg_lo16)),
- bfin_subreg_lo16)>;
+ lo16)),
+ lo16)>;
def : Pat<(sext_inreg D:$src, i16),
- (MOVEsext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>;
+ (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
def : Pat<(and D:$src, 0xffff),
- (MOVEzext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>;
+ (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
def : Pat<(i32 (anyext D16L:$src)),
(INSERT_SUBREG (i32 (IMPLICIT_DEF)),
(i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
- bfin_subreg_lo16)>;
+ lo16)>;
// TODO Dreg = Dreg_byte (X/Z)
@@ -859,4 +859,4 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
def : Pat<(BfinCall (i32 texternalsym:$dst)),
(CALLa texternalsym:$dst)>;
def : Pat<(i16 (trunc D:$src)),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 2512c9b..5153ace 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -111,7 +111,7 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return DisableFramePointerElim(MF) ||
- MFI->hasCalls() || MFI->hasVarSizedObjects();
+ MFI->adjustsStack() || MFI->hasVarSizedObjects();
}
bool BlackfinRegisterInfo::
@@ -177,11 +177,11 @@ void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
// We must split into halves
BuildMI(MBB, I, DL,
- TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_hi16))
+ TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
.addImm((value >> 16) & 0xffff)
.addReg(Reg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL,
- TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_lo16))
+ TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
.addImm(value & 0xffff)
.addReg(Reg, RegState::ImplicitKill)
.addReg(Reg, RegState::ImplicitDefine);
@@ -394,7 +394,7 @@ void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const {
}
if (!hasFP(MF)) {
- assert(!MFI->hasCalls() &&
+ assert(!MFI->adjustsStack() &&
"FP elimination on a non-leaf function is not supported");
adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
return;
@@ -435,7 +435,7 @@ void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF,
assert(FrameSize%4 == 0 && "Misaligned frame size");
if (!hasFP(MF)) {
- assert(!MFI->hasCalls() &&
+ assert(!MFI->adjustsStack() &&
"FP elimination on a non-leaf function is not supported");
adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
return;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 7cfb120..03c5450 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -24,13 +24,6 @@ namespace llvm {
class TargetInstrInfo;
class Type;
- // Subregister indices, keep in sync with BlackfinRegisterInfo.td
- enum BfinSubregIdx {
- bfin_subreg_lo16 = 1,
- bfin_subreg_hi16 = 2,
- bfin_subreg_lo32 = 3
- };
-
struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
BlackfinSubtarget &Subtarget;
const TargetInstrInfo &TII;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index d396cc8..e1cfae9 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -11,8 +11,18 @@
// Declarations that describe the Blackfin register file
//===----------------------------------------------------------------------===//
-// Registers are identified with 3-bit group and 3-bit ID numbers.
+// Subregs are:
+// 1: .L
+// 2: .H
+// 3: .W (32 low bits of 40-bit accu)
+let Namespace = "BF" in {
+def lo16 : SubRegIndex;
+def hi16 : SubRegIndex;
+def lo32 : SubRegIndex;
+def hi32 : SubRegIndex;
+}
+// Registers are identified with 3-bit group and 3-bit ID numbers.
class BlackfinReg<string n> : Register<n> {
field bits<3> Group;
field bits<3> Num;
@@ -40,6 +50,7 @@ class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
// Ra 40-bit accumulator registers
class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
let SubRegs = subs;
+ let SubRegIndices = [hi32, lo32];
let Group = 4;
let Num = num;
}
@@ -54,6 +65,7 @@ multiclass Rss<bits<3> group, bits<3> num, string n> {
class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
: BlackfinReg<n> {
let SubRegs = subs;
+ let SubRegIndices = [hi16, lo16];
let Group = group;
let Num = num;
}
@@ -164,7 +176,7 @@ def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>;
def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>;
def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> {
- let SubRegs = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
+ let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
}
def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
@@ -182,38 +194,6 @@ def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
-// Subregs are:
-// 1: .L
-// 2: .H
-// 3: .W (32 low bits of 40-bit accu)
-// Keep in sync with enum in BlackfinRegisterInfo.h
-def bfin_subreg_lo16 : PatLeaf<(i32 1)>;
-def bfin_subreg_hi16 : PatLeaf<(i32 2)>;
-def bfin_subreg_32bit : PatLeaf<(i32 3)>;
-
-def : SubRegSet<1,
- [R0, R1, R2, R3, R4, R5, R6, R7,
- P0, P1, P2, P3, P4, P5, SP, FP,
- I0, I1, I2, I3, M0, M1, M2, M3,
- B0, B1, B2, B3, L0, L1, L2, L3],
- [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L,
- P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL,
- I0L, I1L, I2L, I3L, M0L, M1L, M2L, M3L,
- B0L, B1L, B2L, B3L, L0L, L1L, L2L, L3L]>;
-
-def : SubRegSet<2,
- [R0, R1, R2, R3, R4, R5, R6, R7,
- P0, P1, P2, P3, P4, P5, SP, FP,
- I0, I1, I2, I3, M0, M1, M2, M3,
- B0, B1, B2, B3, L0, L1, L2, L3],
- [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H,
- P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH,
- I0H, I1H, I2H, I3H, M0H, M1H, M2H, M3H,
- B0H, B1H, B2H, B3H, L0H, L1H, L2H, L3H]>;
-
-def : SubRegSet<1, [A0, A0W, A1, A1W], [A0L, A0L, A1L, A1L]>;
-def : SubRegSet<2, [A0, A0W, A1, A1W], [A0H, A0H, A1H, A1H]>;
-
// Register classes.
def D16 : RegisterClass<"BF", [i16], 16,
[R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
@@ -260,11 +240,11 @@ def GR16 : RegisterClass<"BF", [i16], 16,
L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>;
def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
- let SubRegClassList = [D16L, D16H];
+ let SubRegClasses = [(D16L lo16), (D16H hi16)];
}
def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
- let SubRegClassList = [P16L, P16H];
+ let SubRegClasses = [(P16L lo16), (P16H hi16)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -287,7 +267,7 @@ def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>;
def DP : RegisterClass<"BF", [i32], 32,
[R0, R1, R2, R3, R4, R5, R6, R7,
P0, P1, P2, P3, P4, P5, FP, SP]> {
- let SubRegClassList = [DP16L, DP16H];
+ let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
index f4bb25f..a21f696 100644
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -12,10 +12,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "blackfin-selectiondag-info"
-#include "BlackfinSelectionDAGInfo.h"
+#include "BlackfinTargetMachine.h"
using namespace llvm;
-BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() {
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
+ const BlackfinTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
index a620330..f1ce348 100644
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class BlackfinTargetMachine;
+
class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- BlackfinSelectionDAGInfo();
+ explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
~BlackfinSelectionDAGInfo();
};
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index 45d7c35..66a2f68 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -31,6 +31,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
DataLayout("e-p:32:32-i64:32-f64:32-n32"),
Subtarget(TT, FS),
TLInfo(*this),
+ TSInfo(*this),
InstrInfo(Subtarget),
FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) {
}
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index 07e7394..a63aa54 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -20,6 +20,7 @@
#include "BlackfinInstrInfo.h"
#include "BlackfinSubtarget.h"
#include "BlackfinISelLowering.h"
+#include "BlackfinSelectionDAGInfo.h"
#include "BlackfinIntrinsicInfo.h"
namespace llvm {
@@ -28,6 +29,7 @@ namespace llvm {
const TargetData DataLayout;
BlackfinSubtarget Subtarget;
BlackfinTargetLowering TLInfo;
+ BlackfinSelectionDAGInfo TSInfo;
BlackfinInstrInfo InstrInfo;
TargetFrameInfo FrameInfo;
BlackfinIntrinsicInfo IntrinsicInfo;
@@ -46,6 +48,9 @@ namespace llvm {
virtual const BlackfinTargetLowering* getTargetLowering() const {
return &TLInfo;
}
+ virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 67f513b..55b8aaa 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2165,6 +2165,9 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
case CallingConv::X86_FastCall:
Out << "__attribute__((fastcall)) ";
break;
+ case CallingConv::X86_ThisCall:
+ Out << "__attribute__((thiscall)) ";
+ break;
default:
break;
}
@@ -3554,11 +3557,11 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
// External Interface declaration
//===----------------------------------------------------------------------===//
-bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify) {
+bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(createGCLoweringPass());
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index d178e7f..6fed195 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -23,12 +23,11 @@ struct CTargetMachine : public TargetMachine {
CTargetMachine(const Target &T, const std::string &TT, const std::string &FS)
: TargetMachine(T) {}
- virtual bool WantsWholeFile() const { return true; }
- virtual bool addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
index 4783dd5..0e7ad35 100644
--- a/lib/Target/CellSPU/README.txt
+++ b/lib/Target/CellSPU/README.txt
@@ -10,6 +10,8 @@ Department in The Aerospace Corporation:
- Chandler Carruth (LLVM expertise)
- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
+Some minor fixes added by Kalle Raiskila.
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 5e04454..081e8d0 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -485,7 +485,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Set pre-RA register scheduler default to BURR, which produces slightly
// better code than the default (could also be TDRR, but TargetLowering.h
// needs a mod to support that model):
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
}
const char *
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 423da3b..4c53c98 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -255,16 +255,14 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const
{
// We support cross register class moves for our aliases, such as R3 in any
// reg class to any other reg class containing R3. This is required because
// we instruction select bitconvert i64 -> f64 as a noop for example, so our
// types have no specific meaning.
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
if (DestRC == SPU::R8CRegisterClass) {
BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg);
} else if (DestRC == SPU::R16CRegisterClass) {
@@ -291,9 +289,10 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
void
SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
unsigned opc;
bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
@@ -325,9 +324,10 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void
SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
unsigned opc;
bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
@@ -467,6 +467,9 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// If there is only one terminator instruction, process it.
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (isUncondBranch(LastInst)) {
+ // Check for jump tables
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
TBB = LastInst->getOperand(0).getMBB();
return false;
} else if (isCondBranch(LastInst)) {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index 42677fc..6dabd7c 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -60,19 +60,22 @@ namespace llvm {
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
//! Store a register to a stack slot, based on its register class.
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
//! Load a register from a stack slot, based on its register class.
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
//! Return true if the specified load or store can be folded
virtual
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 6d1f87d..a7fb14c 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -655,7 +655,7 @@ def SFHvec:
def SFHr16:
RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
"sfh\t$rT, $rA, $rB", IntegerOp,
- [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>;
+ [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
def SFHIvec:
RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
@@ -670,11 +670,11 @@ def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
- [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>;
+ [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
def SFIvec:
RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index fdbe10f..d8937ec 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -469,7 +469,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
// the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->hasCalls()) {
+ if (FrameSize > 16 || MFI->adjustsStack()) {
FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
@@ -569,7 +569,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
&& "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
// the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->hasCalls()) {
+ if (FrameSize > 16 || MFI->adjustsStack()) {
FrameSize = FrameSize + SPUFrameInfo::minStackSize();
if (isInt<10>(FrameSize + LinkSlotOffset)) {
// Reload $lr, adjust $sp by required amount
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
index ca2a4bf..5732fd4 100644
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "cellspu-selectiondag-info"
-#include "SPUSelectionDAGInfo.h"
+#include "SPUTargetMachine.h"
using namespace llvm;
-SPUSelectionDAGInfo::SPUSelectionDAGInfo() {
+SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
index 0a6b4c1..39257d9 100644
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class SPUTargetMachine;
+
class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- SPUSelectionDAGInfo();
+ explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
~SPUSelectionDAGInfo();
};
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 6500067..480ec3f 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -42,6 +42,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
InstrInfo(*this),
FrameInfo(*this),
TLInfo(*this),
+ TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
// For the time being, use static relocations, since there's really no
// support for PIC yet.
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 37e7cd2..7e02701 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -17,6 +17,7 @@
#include "SPUSubtarget.h"
#include "SPUInstrInfo.h"
#include "SPUISelLowering.h"
+#include "SPUSelectionDAGInfo.h"
#include "SPUFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
@@ -34,6 +35,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUInstrInfo InstrInfo;
SPUFrameInfo FrameInfo;
SPUTargetLowering TLInfo;
+ SPUSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
public:
SPUTargetMachine(const Target &T, const std::string &TT,
@@ -61,6 +63,10 @@ public:
return &TLInfo;
}
+ virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const SPURegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index e739b26..45a0c84 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1038,6 +1038,11 @@ namespace {
Out << ");";
nl(Out);
}
+ if (GV->isThreadLocal()) {
+ printCppName(GV);
+ Out << "->setThreadLocal(true);";
+ nl(Out);
+ }
if (is_inline) {
out(); Out << "}"; nl(Out);
}
@@ -2007,11 +2012,11 @@ char CppWriter::ID = 0;
// External Interface declaration
//===----------------------------------------------------------------------===//
-bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify) {
+bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(new CppWriter(o));
return false;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index b7aae91..e42166e 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -26,12 +26,11 @@ struct CPPTargetMachine : public TargetMachine {
const std::string &FS)
: TargetMachine(T) {}
- virtual bool WantsWholeFile() const { return true; }
- virtual bool addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index 04dfb0a..e42e9b3 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -155,7 +155,7 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
CPUBitmask |= (1 << MBlazeRegisterInfo::
getRegisterNumbering(RI.getFrameRegister(*MF)));
- if (MFI->hasCalls())
+ if (MFI->adjustsStack())
CPUBitmask |= (1 << MBlazeRegisterInfo::
getRegisterNumbering(RI.getRARegister()));
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 01f3174..4c4d86b 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -107,7 +107,6 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
void MBlazeInstrInfo::
insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
BuildMI(MBB, MI, DL, get(MBlaze::NOP));
}
@@ -115,8 +114,8 @@ bool MBlazeInstrInfo::
copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg)
.addReg(SrcReg).addReg(MBlaze::R0);
return true;
@@ -125,7 +124,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MBlazeInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
.addImm(0).addFrameIndex(FI);
@@ -134,7 +134,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MBlazeInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
.addImm(0).addFrameIndex(FI);
@@ -210,7 +211,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20,
MBlaze::CPURegsRegisterClass,
- MBlaze::CPURegsRegisterClass);
+ MBlaze::CPURegsRegisterClass,
+ DebugLoc());
assert(Ok && "Couldn't assign to global base register!");
Ok = Ok; // Silence warning when assertions are turned off.
RegInfo.addLiveIn(MBlaze::R20);
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index 4f79f1c..c9fdc88 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -203,16 +203,19 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index e15176e..f15eea9 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -220,7 +220,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
StackOffset += RegSize;
}
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
MBlazeFI->setRAStackOffset(0);
MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
StackOffset);
@@ -311,8 +311,8 @@ emitPrologue(MachineFunction &MF) const {
unsigned StackSize = MFI->getStackSize();
// No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->hasCalls()) return;
- if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+ if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
int FPOffset = MBlazeFI->getFPStackOffset();
int RAOffset = MBlazeFI->getRAStackOffset();
@@ -323,7 +323,7 @@ emitPrologue(MachineFunction &MF) const {
// Save the return address only if the function isnt a leaf one.
// swi R15, R1, stack_loc
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
.addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
}
@@ -366,14 +366,14 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
// Restore the return address only if the function isnt a leaf one.
// lwi R15, R1, stack_loc
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
.addImm(RAOffset).addReg(MBlaze::R1);
}
// Get the number of bytes from FrameInfo
int StackSize = (int) MFI->getStackSize();
- if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+ if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
// adjust stack.
// addi R1, R1, imm
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index 96a5c98..d0a1e75 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -17,21 +17,15 @@ class MBlazeReg<string n> : Register<n> {
let Namespace = "MBlaze";
}
-class MBlazeRegWithSubRegs<string n, list<Register> subregs>
- : RegisterWithSubRegs<n, subregs> {
- field bits<5> Num;
- let Namespace = "MBlaze";
-}
-
// MBlaze CPU Registers
class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
let Num = num;
}
// MBlaze 32-bit (aliased) FPU Registers
-class FPR<bits<5> num, string n, list<Register> subregs>
- : MBlazeRegWithSubRegs<n, subregs> {
+class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> {
let Num = num;
+ let Aliases = aliases;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
index 105e42a..6a115b2 100644
--- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mblaze-selectiondag-info"
-#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeTargetMachine.h"
using namespace llvm;
-MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() {
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
index 11e6879..9f8e2aa 100644
--- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class MBlazeTargetMachine;
+
class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- MBlazeSelectionDAGInfo();
+ explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM);
~MBlazeSelectionDAGInfo();
};
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 9eba2b3..4252953 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -39,7 +39,7 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
"f64:32:32-v64:32:32-v128:32:32-n32"),
InstrInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
- TLInfo(*this) {
+ TLInfo(*this), TSInfo(*this) {
if (getRelocationModel() == Reloc::Default) {
setRelocationModel(Reloc::Static);
}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 9bf9898..6a57e58 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -17,6 +17,7 @@
#include "MBlazeSubtarget.h"
#include "MBlazeInstrInfo.h"
#include "MBlazeISelLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
#include "MBlazeIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
@@ -31,6 +32,7 @@ namespace llvm {
MBlazeInstrInfo InstrInfo;
TargetFrameInfo FrameInfo;
MBlazeTargetLowering TLInfo;
+ MBlazeSelectionDAGInfo TSInfo;
MBlazeIntrinsicInfo IntrinsicInfo;
public:
MBlazeTargetMachine(const Target &T, const std::string &TT,
@@ -54,6 +56,9 @@ namespace llvm {
virtual const MBlazeTargetLowering *getTargetLowering() const
{ return &TLInfo; }
+ virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const
+ { return &TSInfo; }
+
const TargetIntrinsicInfo *getIntrinsicInfo() const
{ return &IntrinsicInfo; }
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 15d16ec..3de173c 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -34,12 +34,11 @@ namespace llvm {
MSILTarget(const Target &T, const std::string &TT, const std::string &FS)
: TargetMachine(T) {}
- virtual bool WantsWholeFile() const { return true; }
- virtual bool addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
@@ -279,6 +278,8 @@ std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) {
return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
case CallingConv::X86_StdCall:
return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
+ case CallingConv::X86_ThisCall:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) ";
default:
errs() << "CallingConvID = " << CallingConvID << '\n';
llvm_unreachable("Unsupported calling convention");
@@ -1686,11 +1687,11 @@ void MSILWriter::printExternals() {
// External Interface declaration
//===----------------------------------------------------------------------===//
-bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify)
+bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify)
{
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
MSILWriter* Writer = new MSILWriter(o);
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
index f9620e8..e937696 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
@@ -26,7 +26,7 @@ namespace llvm {
/// MSP430MCInstLower - This class is used to lower an MachineInstr
/// into an MCInst.
-class VISIBILITY_HIDDEN MSP430MCInstLower {
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
MCContext &Ctx;
Mangler &Mang;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index c3e2bdf7..403400e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -83,7 +83,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setStackPointerRegisterToSaveRestore(MSP430::SPW);
setBooleanContents(ZeroOrOneBooleanContent);
- setSchedulingPreference(SchedulingForLatency);
+ setSchedulingPreference(Sched::Latency);
// We have post-incremented loads / stores.
setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
@@ -897,6 +897,9 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
@@ -920,6 +923,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 2b09b3d..18226ab 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -32,7 +32,8 @@ MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
@@ -59,7 +60,8 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
@@ -85,10 +87,8 @@ bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == SrcRC) {
unsigned Opc;
if (DestRC == &MSP430::GR16RegClass) {
@@ -130,7 +130,8 @@ MSP430InstrInfo::isMoveInstr(const MachineInstr& MI,
bool
MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -154,7 +155,8 @@ MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool
MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 6ef4b0a..842b4cb 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -52,7 +52,8 @@ public:
bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -62,18 +63,22 @@ public:
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 4078626..f8aec66 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -43,6 +43,9 @@ def R13B : MSP430Reg<13, "r13">;
def R14B : MSP430Reg<14, "r14">;
def R15B : MSP430Reg<15, "r15">;
+def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; }
+
+let SubRegIndices = [subreg_8bit] in {
def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
@@ -59,13 +62,7 @@ def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
-
-def : SubRegSet<1, [PCW, SPW, SRW, CGW, FPW,
- R5W, R6W, R7W, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
- [PCB, SPB, SRB, CGB, FPB,
- R5B, R6B, R7B, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def subreg_8bit : PatLeaf<(i32 1)>;
+}
def GR8 : RegisterClass<"MSP430", [i8], 8,
// Volatile registers
@@ -101,7 +98,7 @@ def GR16 : RegisterClass<"MSP430", [i16], 16,
// Volatile, but not allocable
PCW, SPW, SRW, CGW]>
{
- let SubRegClassList = [GR8];
+ let SubRegClasses = [(GR8 subreg_8bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
index a54c929..24f45fa 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "msp430-selectiondag-info"
-#include "MSP430SelectionDAGInfo.h"
+#include "MSP430TargetMachine.h"
using namespace llvm;
-MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() {
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index c952ab7..fa81948 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class MSP430TargetMachine;
+
class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- MSP430SelectionDAGInfo();
+ explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
~MSP430SelectionDAGInfo();
};
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index a0dbac2..99877c8 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -33,7 +33,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
Subtarget(TT, FS),
// FIXME: Check TargetData string.
DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
- InstrInfo(*this), TLInfo(*this),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 68bde9a..b93edfd 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -17,6 +17,7 @@
#include "MSP430InstrInfo.h"
#include "MSP430ISelLowering.h"
+#include "MSP430SelectionDAGInfo.h"
#include "MSP430RegisterInfo.h"
#include "MSP430Subtarget.h"
#include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class MSP430TargetMachine : public LLVMTargetMachine {
const TargetData DataLayout; // Calculates type size & alignment
MSP430InstrInfo InstrInfo;
MSP430TargetLowering TLInfo;
+ MSP430SelectionDAGInfo TSInfo;
// MSP430 does not have any call stack frame, therefore not having
// any MSP430 specific FrameInfo class.
@@ -54,6 +56,10 @@ public:
return &TLInfo;
}
+ virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
}; // MSP430TargetMachine.
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index d269153..4d7fe4c 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -145,7 +145,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
CPUBitmask |= (1 << MipsRegisterInfo::
getRegisterNumbering(RI.getFrameRegister(*MF)));
- if (MFI->hasCalls())
+ if (MFI->adjustsStack())
CPUBitmask |= (1 << MipsRegisterInfo::
getRegisterNumbering(RI.getRARegister()));
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index ee85a3f3..3888bbf 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -225,12 +225,12 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
MVT::Other, Offset0, Base, Chain);
SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, NVT), 0);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl,
+ SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
MVT::f64, Undef, SDValue(LD0, 0));
SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
MVT::Other, Offset1, Base, SDValue(LD0, 1));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl,
+ SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
MVT::f64, I0, SDValue(LD1, 0));
ReplaceUses(SDValue(N, 0), I1);
@@ -266,9 +266,9 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
// Get the even and odd part from the f64 register
- SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD,
+ SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
dl, MVT::f32, N1);
- SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPEVEN,
+ SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
dl, MVT::f32, N1);
// The second store should start after for 4 bytes.
@@ -438,9 +438,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
SDValue Undef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl,
+ SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
MVT::f64, Undef, SDValue(MTC, 0));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl,
+ SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
MVT::f64, I0, SDValue(MTC, 0));
ReplaceUses(SDValue(Node, 0), I1);
return I1.getNode();
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index dbd3c24..4005e35 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -124,7 +124,6 @@ void MipsInstrInfo::
insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
{
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
BuildMI(MBB, MI, DL, get(Mips::NOP));
}
@@ -132,10 +131,8 @@ bool MipsInstrInfo::
copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
-
- if (I != MBB.end()) DL = I->getDebugLoc();
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC != SrcRC) {
@@ -190,7 +187,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MipsInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -223,7 +221,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MipsInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -624,7 +623,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP,
Mips::CPURegsRegisterClass,
- Mips::CPURegsRegisterClass);
+ Mips::CPURegsRegisterClass,
+ DebugLoc());
assert(Ok && "Couldn't assign to global base register!");
Ok = Ok; // Silence warning when assertions are turned off.
RegInfo.addLiveIn(Mips::GP);
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index ab8dc59..7919d9a 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -209,16 +209,19 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 478da84..5e719af 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -288,7 +288,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
// Stack locations for FP and RA. If only one of them is used,
// the space must be allocated for both, otherwise no space at all.
- if (hasFP(MF) || MFI->hasCalls()) {
+ if (hasFP(MF) || MFI->adjustsStack()) {
// FP stack location
MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
StackOffset);
@@ -302,7 +302,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
MipsFI->setRAStackOffset(StackOffset);
StackOffset += RegSize;
- if (MFI->hasCalls())
+ if (MFI->adjustsStack())
TopCPUSavedRegOff += RegSize;
}
@@ -407,7 +407,7 @@ emitPrologue(MachineFunction &MF) const
unsigned StackSize = MFI->getStackSize();
// No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->hasCalls()) return;
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
int FPOffset = MipsFI->getFPStackOffset();
int RAOffset = MipsFI->getRAStackOffset();
@@ -425,7 +425,7 @@ emitPrologue(MachineFunction &MF) const
// Save the return address only if the function isnt a leaf one.
// sw $ra, stack_loc($sp)
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
.addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
}
@@ -477,7 +477,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
// Restore the return address only if the function isnt a leaf one.
// lw $ra, stack_loc($sp)
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
.addImm(RAOffset).addReg(Mips::SP);
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 9fd044c..bc857b8 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -23,15 +23,6 @@ class MipsSubtarget;
class TargetInstrInfo;
class Type;
-namespace Mips {
- /// SubregIndex - The index of various sized subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// MipsRegisterInfo.td file.
- enum SubregIndex {
- SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
- };
-}
-
struct MipsRegisterInfo : public MipsGenRegisterInfo {
const MipsSubtarget &Subtarget;
const TargetInstrInfo &TII;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 00e7723..be78a22 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -34,9 +34,14 @@ class FPR<bits<5> num, string n> : MipsReg<n> {
}
// Mips 64-bit (aliased) FPU Registers
-class AFPR<bits<5> num, string n, list<Register> subregs>
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd : SubRegIndex;
+}
+class AFPR<bits<5> num, string n, list<Register> subregs>
: MipsRegWithSubRegs<n, subregs> {
let Num = num;
+ let SubRegIndices = [sub_fpeven, sub_fpodd];
}
//===----------------------------------------------------------------------===//
@@ -141,23 +146,6 @@ let Namespace = "Mips" in {
}
//===----------------------------------------------------------------------===//
-// Subregister Set Definitions
-//===----------------------------------------------------------------------===//
-
-def mips_subreg_fpeven : PatLeaf<(i32 1)>;
-def mips_subreg_fpodd : PatLeaf<(i32 2)>;
-
-def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15],
- [F0, F2, F4, F6, F8, F10, F12, F14,
- F16, F18, F20, F22, F24, F26, F28, F30]>;
-
-def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15],
- [F1, F3, F5, F7, F9, F11, F13, F15,
- F17, F19, F21, F23, F25, F27, F29, F31]>;
-
-//===----------------------------------------------------------------------===//
// Register Classes
//===----------------------------------------------------------------------===//
@@ -255,7 +243,7 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64,
// Reserved
D15]>
{
- let SubRegClassList = [FGR32, FGR32];
+ let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
index 72c149d..e4d70fc 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-selectiondag-info"
-#include "MipsSelectionDAGInfo.h"
+#include "MipsTargetMachine.h"
using namespace llvm;
-MipsSelectionDAGInfo::MipsSelectionDAGInfo() {
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index 6eaf0c9..6cafb55 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class MipsTargetMachine;
+
class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- MipsSelectionDAGInfo();
+ explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
~MipsSelectionDAGInfo();
};
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 4724ff7..ad3eb9e 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -42,7 +42,7 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")),
InstrInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
- TLInfo(*this) {
+ TLInfo(*this), TSInfo(*this) {
// Abicall enables PIC by default
if (getRelocationModel() == Reloc::Default) {
if (Subtarget.isABI_O32())
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index cd671cf..d63976f 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -17,6 +17,7 @@
#include "MipsSubtarget.h"
#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
+#include "MipsSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameInfo.h"
@@ -30,6 +31,7 @@ namespace llvm {
MipsInstrInfo InstrInfo;
TargetFrameInfo FrameInfo;
MipsTargetLowering TLInfo;
+ MipsSelectionDAGInfo TSInfo;
public:
MipsTargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool isLittle);
@@ -51,6 +53,10 @@ namespace llvm {
return &TLInfo;
}
+ virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index a424c27..aa2e1f4 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -29,7 +29,7 @@
#include <string>
namespace llvm {
- class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
+ class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter {
public:
explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
private:
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 5d86329..6a4d0d6 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -70,7 +70,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
// We also need to encode the information about the base type of
// pointer in TypeNo.
- DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom();
+ DIType BaseType = DIDerivedType(Ty).getTypeDerivedFrom();
PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
}
@@ -79,7 +79,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
bool &HasAux, int Aux[],
std::string &TagName) {
- DICompositeType CTy = DICompositeType(Ty.getNode());
+ DICompositeType CTy = DICompositeType(Ty);
DIArray Elements = CTy.getTypeArray();
unsigned short size = 1;
unsigned short Dimension[4]={0,0,0,0};
@@ -88,7 +88,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
TypeNo = TypeNo | PIC16Dbg::DT_ARY;
- DISubrange SubRange = DISubrange(Element.getNode());
+ DISubrange SubRange = DISubrange(Element);
Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
// Each dimension is represented by 2 bytes starting at byte 9.
Aux[8+i*2+0] = Dimension[i];
@@ -111,7 +111,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
unsigned short &TypeNo,
bool &HasAux, int Aux[],
std::string &TagName) {
- DICompositeType CTy = DICompositeType(Ty.getNode());
+ DICompositeType CTy = DICompositeType(Ty);
TypeNo = TypeNo << PIC16Dbg::S_BASIC;
if (Ty.getTag() == dwarf::DW_TAG_structure_type)
TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
@@ -124,7 +124,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
// llvm.dbg.composite* global variable. Since we need to revisit
// PIC16DebugInfo implementation anyways after the MDNodes based
// framework is done, let us continue with the way it is.
- std::string UniqueSuffix = "." + Ty.getNode()->getNameStr().substr(18);
+ std::string UniqueSuffix = "." + Ty->getNameStr().substr(18);
TagName += UniqueSuffix;
unsigned short size = CTy.getSizeInBits()/8;
// 7th and 8th byte represent size.
@@ -303,7 +303,7 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
bool HasAux = false;
int ElementAux[PIC16Dbg::AuxSize] = { 0 };
std::string TagName = "";
- DIDerivedType DITy(Element.getNode());
+ DIDerivedType DITy(Element);
unsigned short ElementSize = DITy.getSizeInBits()/8;
// Get mangleddd name for this structure/union element.
std::string MangMemName = DITy.getName().str() + SuffixNo;
@@ -336,7 +336,7 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
CTy.getTag() == dwarf::DW_TAG_structure_type ) {
// Get the number after llvm.dbg.composite and make UniqueSuffix from
// it.
- std::string DIVar = CTy.getNode()->getNameStr();
+ std::string DIVar = CTy->getNameStr();
std::string UniqueSuffix = "." + DIVar.substr(18);
std::string MangledCTyName = CTy.getName().str() + UniqueSuffix;
unsigned short size = CTy.getSizeInBits()/8;
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index f1fcec5..ecaddd3 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -26,7 +26,7 @@ using namespace llvm;
namespace {
-class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel {
+class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel {
/// TM - Keep a reference to PIC16TargetMachine.
const PIC16TargetMachine &TM;
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 9e415e0..793dd9f 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -70,7 +70,8 @@ unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const PIC16TargetLowering *PTLI = TM.getTargetLowering();
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -112,7 +113,8 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const PIC16TargetLowering *PTLI = TM.getTargetLowering();
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -153,9 +155,8 @@ bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == PIC16::FSR16RegisterClass) {
BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg);
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index 56f51f0..40a4cb4 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -49,17 +49,20 @@ public:
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual bool isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h
index 9039ca7..5b33b51 100644
--- a/lib/Target/PIC16/PIC16Section.h
+++ b/lib/Target/PIC16/PIC16Section.h
@@ -44,7 +44,8 @@ namespace llvm {
unsigned Size;
PIC16Section(StringRef name, SectionKind K, StringRef addr, int color)
- : MCSection(K), Name(name), Address(addr), Color(color), Size(0) {
+ : MCSection(SV_PIC16, K), Name(name), Address(addr),
+ Color(color), Size(0) {
}
public:
@@ -86,6 +87,11 @@ namespace llvm {
/// to a section.
virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
raw_ostream &OS) const;
+
+ static bool classof(const MCSection *S) {
+ return S->getVariant() == SV_PIC16;
+ }
+ static bool classof(const PIC16Section *) { return true; }
};
} // end namespace llvm
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
index 76c6c60..995955a 100644
--- a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "pic16-selectiondag-info"
-#include "PIC16SelectionDAGInfo.h"
+#include "PIC16TargetMachine.h"
using namespace llvm;
-PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() {
+PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() {
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
index 112480e5..c67fd8b 100644
--- a/lib/Target/PIC16/PIC16SelectionDAGInfo.h
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class PIC16TargetMachine;
+
class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- PIC16SelectionDAGInfo();
+ explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM);
~PIC16SelectionDAGInfo();
};
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index e2acb85..82b69be 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -35,7 +35,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, Trad),
DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"),
- InstrInfo(*this), TLInfo(*this),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
index 849845a..dae5d31 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.h
+++ b/lib/Target/PIC16/PIC16TargetMachine.h
@@ -17,6 +17,7 @@
#include "PIC16InstrInfo.h"
#include "PIC16ISelLowering.h"
+#include "PIC16SelectionDAGInfo.h"
#include "PIC16RegisterInfo.h"
#include "PIC16Subtarget.h"
#include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class PIC16TargetMachine : public LLVMTargetMachine {
const TargetData DataLayout; // Calculates type size & alignment
PIC16InstrInfo InstrInfo;
PIC16TargetLowering TLInfo;
+ PIC16SelectionDAGInfo TSInfo;
// PIC16 does not have any call stack frame, therefore not having
// any PIC16 specific FrameInfo class.
@@ -54,6 +56,10 @@ public:
return &TLInfo;
}
+ virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3d9f8aa..00eebb8 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -712,8 +712,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
CCReg), 0);
- else
- IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, CCReg), 0);
+ else
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ CR7Reg, CCReg), 0);
SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
getI32Imm(31), getI32Imm(31) };
@@ -848,7 +849,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
N->getOperand(0), InFlag);
else
- return CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, InFlag);
+ return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ N->getOperand(0), InFlag);
}
case ISD::SDIV: {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 6f11953..10b516a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5496,12 +5496,15 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
DebugLoc dl = Op.getDebugLoc();
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
// Make sure the function does not optimize away the store of the RA to
// the stack.
- MachineFunction &MF = DAG.getMachineFunction();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
bool isPPC64 = PPCSubTarget.isPPC64();
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 1d05f3d..6dcaf1e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -111,9 +111,10 @@ namespace llvm {
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
- /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions.
- /// This copies the bits corresponding to the specified CRREG into the
- /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
+ /// instructions. This copies the bits corresponding to the specified
+ /// CRREG into the resultant GPR. Bits corresponding to other CR regs
+ /// are undefined.
MFCR,
/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index ae1fbd8..1b7a778 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -203,8 +203,6 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
BuildMI(MBB, MI, DL, get(PPC::NOP));
}
@@ -347,15 +345,13 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC != SrcRC) {
// Not yet supported!
return false;
}
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
if (DestRC == PPC::GPRCRegisterClass) {
BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
} else if (DestRC == PPC::G8RCRegisterClass) {
@@ -446,7 +442,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// issue a MFCR to save all of the CRBits.
unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
PPC::R2 : PPC::R0;
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+ .addReg(SrcReg, getKillRegState(isKill)));
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
@@ -520,7 +517,8 @@ void
PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
@@ -635,7 +633,8 @@ void
PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 9fb6e7d..7a9e11b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -114,17 +114,20 @@ public:
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 532a3ec..63b4581 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -662,7 +662,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
[(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
isDOT;
-let isBarrier = 1, hasCtrlDep = 1 in
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
//===----------------------------------------------------------------------===//
@@ -862,7 +862,6 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
[(store F8RC:$frS, xaddr:$dst)]>;
}
-let isBarrier = 1 in
def SYNC : XForm_24_sync<31, 598, (outs), (ins),
"sync", LdStSync,
[(int_ppc_sync)]>;
@@ -1118,14 +1117,17 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
-// FIXME: this Uses all the CR registers. Marking it as such is
-// necessary for DeadMachineInstructionElim to do the right thing.
-// However, marking it also exposes PR 2964, and causes crashes in
-// the Local RA because it doesn't like this sequence:
+
+// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
+// declaring that here gives the local register allocator problems with this:
// vreg = MCRF CR0
// MFCR <kill of whatever preg got assigned to vreg>
-// For now DeadMachineInstructionElim is turned off, so don't do the marking.
-def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), "mfcr $rT", SprMFCR>,
+// while not declaring it breaks DeadMachineInstructionElimination.
+// As it turns out, in all cases where we currently use this,
+// we're only interested in one subregister of it. Represent this in the
+// instruction to keep the register allocator from becoming confused.
+def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "mfcr $rT ${:comment} $FXM", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"mfcr $rT, $FXM", SprMFCR>,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 5f1e04e..0ff852c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -689,19 +689,15 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+ unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
- // an MFCR to save all of the CRBits. Add an implicit kill of the CR.
- if (!MI.getOperand(0).isKill())
- BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg);
- else
- // Implicitly kill the CR register.
- BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg)
- .addReg(MI.getOperand(0).getReg(), RegState::ImplicitKill);
+ // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
+ BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
- unsigned SrcReg = MI.getOperand(0).getReg();
if (SrcReg != PPC::CR0)
// rlwinm rA, rA, ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
@@ -1009,7 +1005,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
if (!DisableRedZone &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->hasCalls() && // No calls.
+ !MFI->adjustsStack() && // No calls.
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
MFI->setStackSize(0);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 1cb7340..8604f54 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -10,6 +10,15 @@
//
//===----------------------------------------------------------------------===//
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex;
+def sub_gt : SubRegIndex;
+def sub_eq : SubRegIndex;
+def sub_un : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+
class PPCReg<string n> : Register<n> {
let Namespace = "PPC";
}
@@ -25,6 +34,7 @@ class GPR<bits<5> num, string n> : PPCReg<n> {
class GP8<GPR SubReg, string n> : PPCReg<n> {
field bits<5> Num = SubReg.Num;
let SubRegs = [SubReg];
+ let SubRegIndices = [sub_32];
}
// SPR - One of the 32-bit special-purpose registers
@@ -225,6 +235,7 @@ def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>;
def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>;
// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>;
def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>;
def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>;
@@ -233,15 +244,7 @@ def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>;
def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>;
def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>;
def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>;
-
-def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
- [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>;
-def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
- [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>;
-def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
- [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>;
-def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
- [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>;
+}
// Link register
def LR : SPR<8, "lr">, DwarfRegNum<[65]>;
@@ -372,7 +375,7 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32,
def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2,
CR3, CR4]>
{
- let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC];
+ let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
}
def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index c0004a9..d4258b4 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "powerpc-selectiondag-info"
-#include "PPCSelectionDAGInfo.h"
+#include "PPCTargetMachine.h"
using namespace llvm;
-PPCSelectionDAGInfo::PPCSelectionDAGInfo() {
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index 3ad3418..341b69c 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class PPCTargetMachine;
+
class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- PPCSelectionDAGInfo();
+ explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
~PPCSelectionDAGInfo();
};
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index c4a7408..10cd10b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -44,7 +44,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
- FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this),
+ FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit),
+ TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
if (getRelocationModel() == Reloc::Default) {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 35e33a2..626ddbb 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -19,6 +19,7 @@
#include "PPCJITInfo.h"
#include "PPCInstrInfo.h"
#include "PPCISelLowering.h"
+#include "PPCSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
@@ -35,6 +36,7 @@ class PPCTargetMachine : public LLVMTargetMachine {
PPCFrameInfo FrameInfo;
PPCJITInfo JITInfo;
PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
public:
@@ -47,6 +49,9 @@ public:
virtual const PPCTargetLowering *getTargetLowering() const {
return &TLInfo;
}
+ virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const PPCRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 144bf5d..7fa73ed 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1833,6 +1833,21 @@ entry:
We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates.
See GCC PR34949
+Another interesting case is that something related could be used for variables
+that go const after their ctor has finished. In these cases, globalopt (which
+can statically run the constructor) could mark the global const (so it gets put
+in the readonly section). A testcase would be:
+
+#include <complex>
+using namespace std;
+const complex<char> should_be_in_rodata (42,-42);
+complex<char> should_be_in_data (42,-42);
+complex<char> should_be_in_bss;
+
+Where we currently evaluate the ctors but the globals don't become const because
+the optimizer doesn't know they "become const" after the ctor is done. See
+GCC PR4131 for more examples.
+
//===---------------------------------------------------------------------===//
In this code:
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index e494d7d..8e49eca 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -122,15 +122,13 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC != SrcRC) {
// Not yet supported!
return false;
}
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
if (DestRC == SP::IntRegsRegisterClass)
BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg);
else if (DestRC == SP::FPRegsRegisterClass)
@@ -148,7 +146,8 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
void SparcInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -169,7 +168,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void SparcInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 345674b..a00ba39 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -74,17 +74,20 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 2b05c19..fede929 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -20,6 +20,11 @@ class SparcCtrlReg<string n>: Register<n> {
let Namespace = "SP";
}
+let Namespace = "SP" in {
+def sub_even : SubRegIndex;
+def sub_odd : SubRegIndex;
+}
+
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers
class Ri<bits<5> num, string n> : SparcReg<n> {
@@ -33,6 +38,7 @@ class Rf<bits<5> num, string n> : SparcReg<n> {
class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
let Num = num;
let SubRegs = subregs;
+ let SubRegIndices = [sub_even, sub_odd];
}
// Control Registers
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
index 4825aa9..190c575 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sparc-selectiondag-info"
-#include "SparcSelectionDAGInfo.h"
+#include "SparcTargetMachine.h"
using namespace llvm;
-SparcSelectionDAGInfo::SparcSelectionDAGInfo() {
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index bc1b561..dcd4203 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class SparcTargetMachine;
+
class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- SparcSelectionDAGInfo();
+ explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
~SparcSelectionDAGInfo();
};
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index a676623..b58d6ba 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -34,7 +34,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
- TLInfo(*this), InstrInfo(Subtarget),
+ TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 1367a31..322c82a 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -20,6 +20,7 @@
#include "SparcInstrInfo.h"
#include "SparcSubtarget.h"
#include "SparcISelLowering.h"
+#include "SparcSelectionDAGInfo.h"
namespace llvm {
@@ -27,6 +28,7 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcSubtarget Subtarget;
const TargetData DataLayout; // Calculates type size & alignment
SparcTargetLowering TLInfo;
+ SparcSelectionDAGInfo TSInfo;
SparcInstrInfo InstrInfo;
TargetFrameInfo FrameInfo;
public:
@@ -42,6 +44,9 @@ public:
virtual const SparcTargetLowering* getTargetLowering() const {
return &TLInfo;
}
+ virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const TargetData *getTargetData() const { return &DataLayout; }
// Pass Pipeline Configuration
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index 2094cc9..b35190a 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -359,29 +359,25 @@ void SubtargetFeatures::dump() const {
print(dbgs());
}
-/// getDefaultSubtargetFeatures - Return a string listing
-/// the features associated with the target triple.
+/// getDefaultSubtargetFeatures - Return a string listing the features
+/// associated with the target triple.
///
/// FIXME: This is an inelegant way of specifying the features of a
/// subtarget. It would be better if we could encode this information
/// into the IR. See <rdar://5972456>.
///
-std::string SubtargetFeatures::getDefaultSubtargetFeatures(
- const Triple& Triple) {
- switch (Triple.getVendor()) {
- case Triple::Apple:
- switch (Triple.getArch()) {
- case Triple::ppc: // powerpc-apple-*
- return std::string("altivec");
- case Triple::ppc64: // powerpc64-apple-*
- return std::string("64bit,altivec");
- default:
- break;
+void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU,
+ const Triple& Triple) {
+ setCPU(CPU);
+
+ if (Triple.getVendor() == Triple::Apple) {
+ if (Triple.getArch() == Triple::ppc) {
+ // powerpc-apple-*
+ AddFeature("altivec");
+ } else if (Triple.getArch() == Triple::ppc64) {
+ // powerpc64-apple-*
+ AddFeature("64bit");
+ AddFeature("altivec");
}
- break;
- default:
- break;
- }
-
- return std::string("");
+ }
}
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index 07cfb2c..90be222 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -124,9 +124,9 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
if (strncmp(Modifier + 7, "even", 4) == 0)
- Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_EVEN);
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32);
else if (strncmp(Modifier + 7, "odd", 3) == 0)
- Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_ODD);
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
else
assert(0 && "Invalid subreg modifier");
}
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 75d563b..bb2952a 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -30,11 +30,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static const unsigned subreg_even32 = 1;
-static const unsigned subreg_odd32 = 2;
-static const unsigned subreg_even = 3;
-static const unsigned subreg_odd = 4;
-
namespace {
/// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
/// instead of register numbers for the leaves of the matched tree.
@@ -644,7 +639,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
Dividend =
CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
SDValue(Tmp, 0), SDValue(Dividend, 0),
- CurDAG->getTargetConstant(subreg_odd, MVT::i32));
+ CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
SDNode *Result;
SDValue DivVal = SDValue(Dividend, 0);
@@ -660,7 +655,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the division (odd subreg) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
@@ -673,7 +669,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (even subreg) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_even32 : SystemZ::subreg_even);
SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
@@ -718,7 +715,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, ResVT);
{
- unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
Dividend =
CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
SDValue(Tmp, 0), SDValue(Dividend, 0),
@@ -742,7 +740,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the division (odd subreg) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
@@ -754,7 +753,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (even subreg) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_even32 : SystemZ::subreg_even);
SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index e98f18b..76f2901 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,7 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
// LLVM's current latency-oriented scheduler can't handle physreg definitions
// such as SystemZ has with PSW, so set this to the register-pressure
// scheduler, because it can.
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
setBooleanContents(ZeroOrOneBooleanContent);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c92caa4..043686c 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -61,7 +61,8 @@ static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -90,7 +91,8 @@ void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -119,9 +121,8 @@ bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
// Determine if DstRC and SrcRC have a common superclass.
const TargetRegisterClass *CommonRC = DestRC;
@@ -269,7 +270,8 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
bool
SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -333,7 +335,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetRegisterClass *RegClass = CSI[i].getRegClass();
if (RegClass == &SystemZ::FP64RegClass) {
MBB.addLiveIn(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass,
+ &RI);
}
}
@@ -343,7 +346,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool
SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -359,7 +363,7 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned Reg = CSI[i].getReg();
const TargetRegisterClass *RegClass = CSI[i].getRegClass();
if (RegClass == &SystemZ::FP64RegClass)
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
}
// Restore GP registers
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index ef3b39e..a753f14 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -63,7 +63,8 @@ public:
bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -75,18 +76,22 @@ public:
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 99e396a..42aa5dd 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- SystemZRegisterInfo.h - SystemZ Register Information Impl ----*- C++ -*-===//
+//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,15 +19,6 @@
namespace llvm {
-namespace SystemZ {
- /// SubregIndex - The index of various sized subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// SystemZRegisterInfo.td file.
- enum SubregIndex {
- SUBREG_32BIT = 1, SUBREG_EVEN = 1, SUBREG_ODD = 2
- };
-}
-
class SystemZSubtarget;
class SystemZInstrInfo;
class Type;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 8795847..b561744 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -53,6 +53,14 @@ class FPRL<bits<4> num, string n, list<Register> subregs>
field bits<4> Num = num;
}
+let Namespace = "SystemZ" in {
+def subreg_32bit : SubRegIndex;
+def subreg_even32 : SubRegIndex;
+def subreg_odd32 : SubRegIndex;
+def subreg_even : SubRegIndex;
+def subreg_odd : SubRegIndex;
+}
+
// General-purpose registers
def R0W : GPR32< 0, "r0">, DwarfRegNum<[0]>;
def R1W : GPR32< 1, "r1">, DwarfRegNum<[1]>;
@@ -71,6 +79,7 @@ def R13W : GPR32<13, "r13">, DwarfRegNum<[13]>;
def R14W : GPR32<14, "r14">, DwarfRegNum<[14]>;
def R15W : GPR32<15, "r15">, DwarfRegNum<[15]>;
+let SubRegIndices = [subreg_32bit] in {
def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>;
def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>;
def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>;
@@ -87,8 +96,10 @@ def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
+}
// Register pairs
+let SubRegIndices = [subreg_even32, subreg_odd32] in {
def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>;
def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>;
def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>;
@@ -97,7 +108,11 @@ def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>, DwarfRegNum<[8]>;
def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>, DwarfRegNum<[10]>;
def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>, DwarfRegNum<[12]>;
def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>;
+}
+let SubRegIndices = [subreg_even, subreg_odd],
+ CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit),
+ (subreg_odd32 subreg_odd, subreg_32bit)] in {
def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>;
def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>;
def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>;
@@ -106,6 +121,7 @@ def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>, DwarfRegNum<[8]>;
def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>, DwarfRegNum<[10]>;
def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>, DwarfRegNum<[12]>;
def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>, DwarfRegNum<[14]>;
+}
// Floating-point registers
def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>;
@@ -125,6 +141,7 @@ def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
+let SubRegIndices = [subreg_32bit] in {
def F0L : FPRL< 0, "f0", [F0S]>, DwarfRegNum<[16]>;
def F1L : FPRL< 1, "f1", [F1S]>, DwarfRegNum<[17]>;
def F2L : FPRL< 2, "f2", [F2S]>, DwarfRegNum<[18]>;
@@ -141,39 +158,11 @@ def F12L : FPRL<12, "f12", [F12S]>, DwarfRegNum<[28]>;
def F13L : FPRL<13, "f13", [F13S]>, DwarfRegNum<[29]>;
def F14L : FPRL<14, "f14", [F14S]>, DwarfRegNum<[30]>;
def F15L : FPRL<15, "f15", [F15S]>, DwarfRegNum<[31]>;
+}
// Status register
def PSW : SystemZReg<"psw">;
-def subreg_32bit : PatLeaf<(i32 1)>;
-def subreg_even32 : PatLeaf<(i32 1)>;
-def subreg_odd32 : PatLeaf<(i32 2)>;
-def subreg_even : PatLeaf<(i32 3)>;
-def subreg_odd : PatLeaf<(i32 4)>;
-
-def : SubRegSet<1, [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
- [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<3, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
- [R0D, R2D, R4D, R6D, R8D, R10D, R12D, R14D]>;
-
-def : SubRegSet<4, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
- [R1D, R3D, R5D, R7D, R9D, R11D, R13D, R15D]>;
-
-def : SubRegSet<1, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P],
- [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>;
-
-def : SubRegSet<2, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P],
- [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>;
-
-def : SubRegSet<1, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
- [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>;
-
-def : SubRegSet<2, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
- [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>;
-
/// Register classes
def GR32 : RegisterClass<"SystemZ", [i32], 32,
// Volatile registers
@@ -276,7 +265,7 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64,
// Volatile, but not allocable
R14D, R15D]>
{
- let SubRegClassList = [GR32];
+ let SubRegClasses = [(GR32 subreg_32bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -323,7 +312,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
// Volatile, but not allocable
R14D, R15D]>
{
- let SubRegClassList = [ADDR32];
+ let SubRegClasses = [(ADDR32 subreg_32bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -366,7 +355,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
[R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
{
- let SubRegClassList = [GR32, GR32];
+ let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -402,7 +391,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
[R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
{
- let SubRegClassList = [GR32, GR32, GR64, GR64];
+ let SubRegClasses = [(GR32 subreg_even32, subreg_odd32),
+ (GR64 subreg_even, subreg_odd)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -462,7 +452,7 @@ def FP32 : RegisterClass<"SystemZ", [f32], 32,
def FP64 : RegisterClass<"SystemZ", [f64], 64,
[F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L,
F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> {
- let SubRegClassList = [FP32];
+ let SubRegClasses = [(FP32 subreg_32bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 87c831b..3eabcd2 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "systemz-selectiondag-info"
-#include "SystemZSelectionDAGInfo.h"
+#include "SystemZTargetMachine.h"
using namespace llvm;
-SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() {
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 5292de9..1450401 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class SystemZTargetMachine;
+
class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- SystemZSelectionDAGInfo();
+ explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
~SystemZSelectionDAGInfo();
};
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index dfa26a1..f45827b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -29,7 +29,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T,
Subtarget(TT, FS),
DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
"-f64:64:64-f128:128:128-a0:16:16-n32:64"),
- InstrInfo(*this), TLInfo(*this),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) {
if (getRelocationModel() == Reloc::Default)
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index d3357cc..6af829b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -17,6 +17,7 @@
#include "SystemZInstrInfo.h"
#include "SystemZISelLowering.h"
+#include "SystemZSelectionDAGInfo.h"
#include "SystemZRegisterInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
const TargetData DataLayout; // Calculates type size & alignment
SystemZInstrInfo InstrInfo;
SystemZTargetLowering TLInfo;
+ SystemZSelectionDAGInfo TSInfo;
// SystemZ does not have any call stack frame, therefore not having
// any SystemZ specific FrameInfo class.
@@ -53,6 +55,10 @@ public:
return &TLInfo;
}
+ virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
}; // SystemZTargetMachine.
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index ac67c91..df52368 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -212,7 +212,8 @@ FunctionSections("ffunction-sections",
//
TargetMachine::TargetMachine(const Target &T)
- : TheTarget(T), AsmInfo(0) {
+ : TheTarget(T), AsmInfo(0),
+ MCRelaxAll(false) {
// Typically it will be subtargets that will adjust FloatABIType from Default
// to Soft or Hard.
if (UseSoftFloat)
@@ -273,13 +274,14 @@ namespace llvm {
/// DisableFramePointerElim - This returns true if frame pointer elimination
/// optimization should be disabled for the given machine function.
bool DisableFramePointerElim(const MachineFunction &MF) {
- if (NoFramePointerElim)
- return true;
- if (NoFramePointerElimNonLeaf) {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->hasCalls();
}
- return false;
+
+ return NoFramePointerElim;
}
/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 52983ff..dcc5f61 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -22,14 +22,14 @@ using namespace llvm;
TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
regclass_iterator RCB, regclass_iterator RCE,
+ const char *const *subregindexnames,
int CFSO, int CFDO,
const unsigned* subregs, const unsigned subregsize,
- const unsigned* superregs, const unsigned superregsize,
const unsigned* aliases, const unsigned aliasessize)
: SubregHash(subregs), SubregHashSize(subregsize),
- SuperregHash(superregs), SuperregHashSize(superregsize),
AliasesHash(aliases), AliasesHashSize(aliasessize),
- Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) {
+ Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
+ RegClassBegin(RCB), RegClassEnd(RCE) {
assert(NumRegs < FirstVirtualRegister &&
"Target has too many physical registers!");
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6b403c1..40a6a7b 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -186,20 +186,73 @@ struct X86Operand : public MCParsedAsmOperand {
bool isImm() const { return Kind == Immediate; }
- bool isImmSExt8() const {
- // Accept immediates which fit in 8 bits when sign extended, and
- // non-absolute immediates.
+ bool isImmSExti16i8() const {
if (!isImm())
return false;
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
- int64_t Value = CE->getValue();
- return Value == (int64_t) (int8_t) Value;
- }
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
- return true;
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
-
+ bool isImmSExti32i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i32() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000007FFFFFFFULL)||
+ (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+
bool isMem() const { return Kind == Memory; }
bool isAbsMem() const {
@@ -231,12 +284,6 @@ struct X86Operand : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
- void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
- // FIXME: Support user customization of the render method.
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
@@ -535,6 +582,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
bool X86ATTAsmParser::
ParseInstruction(const StringRef &Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The various flavors of pushf and popf use Requires<In32BitMode> and
+ // Requires<In64BitMode>, but the assembler doesn't yet implement that.
+ // For now, just do a manual check to prevent silent misencoding.
+ if (Is64Bit) {
+ if (Name == "popfl")
+ return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
+ else if (Name == "pushfl")
+ return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
+ } else {
+ if (Name == "popfq")
+ return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
+ else if (Name == "pushfq")
+ return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
+ }
+
// FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
// represent alternative syntaxes in the .td file, without requiring
// instruction duplication.
@@ -547,9 +609,66 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
.Case("repe", "rep")
.Case("repz", "rep")
.Case("repnz", "repne")
+ .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
+ .Case("popf", Is64Bit ? "popfq" : "popfl")
+ .Case("retl", Is64Bit ? "retl" : "ret")
+ .Case("retq", Is64Bit ? "ret" : "retq")
+ .Case("setz", "sete")
+ .Case("setnz", "setne")
+ .Case("jz", "je")
+ .Case("jnz", "jne")
+ .Case("cmovcl", "cmovbl")
+ .Case("cmovcl", "cmovbl")
+ .Case("cmovnal", "cmovbel")
+ .Case("cmovnbl", "cmovael")
+ .Case("cmovnbel", "cmoval")
+ .Case("cmovncl", "cmovael")
+ .Case("cmovngl", "cmovlel")
+ .Case("cmovnl", "cmovgel")
+ .Case("cmovngl", "cmovlel")
+ .Case("cmovngel", "cmovll")
+ .Case("cmovnll", "cmovgel")
+ .Case("cmovnlel", "cmovgl")
+ .Case("cmovnzl", "cmovnel")
+ .Case("cmovzl", "cmovel")
.Default(Name);
+
+ // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+ const MCExpr *ExtraImmOp = 0;
+ if (PatchedName.startswith("cmp") &&
+ (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+ PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ unsigned SSEComparisonCode = StringSwitch<unsigned>(
+ PatchedName.slice(3, PatchedName.size() - 2))
+ .Case("eq", 0)
+ .Case("lt", 1)
+ .Case("le", 2)
+ .Case("unord", 3)
+ .Case("neq", 4)
+ .Case("nlt", 5)
+ .Case("nle", 6)
+ .Case("ord", 7)
+ .Default(~0U);
+ if (SSEComparisonCode != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+ getParser().getContext());
+ if (PatchedName.endswith("ss")) {
+ PatchedName = "cmpss";
+ } else if (PatchedName.endswith("sd")) {
+ PatchedName = "cmpsd";
+ } else if (PatchedName.endswith("ps")) {
+ PatchedName = "cmpps";
+ } else {
+ assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+ PatchedName = "cmppd";
+ }
+ }
+ }
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+ if (ExtraImmOp)
+ Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Parse '*' modifier.
@@ -564,7 +683,7 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.push_back(Op);
else
return true;
-
+
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
@@ -587,6 +706,17 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
+ // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
+ // "f{mul*,add*,sub*,div*} $op"
+ if ((Name.startswith("fmul") || Name.startswith("fadd") ||
+ Name.startswith("fsub") || Name.startswith("fdiv")) &&
+ Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[2])->isReg() &&
+ static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
+ delete Operands[2];
+ Operands.erase(Operands.begin() + 2);
+ }
+
return false;
}
@@ -622,6 +752,31 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
+/// imm operand, to having "rm" or "mr" operands with the offset in the disp
+/// field.
+static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
+ bool isMR) {
+ MCOperand Disp = Inst.getOperand(0);
+
+ // Start over with an empty instruction.
+ Inst = MCInst();
+ Inst.setOpcode(Opc);
+
+ if (!isMR)
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+
+ // Add the mem operand.
+ Inst.addOperand(MCOperand::CreateReg(0)); // Segment
+ Inst.addOperand(MCOperand::CreateImm(1)); // Scale
+ Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg
+ Inst.addOperand(Disp); // Displacement
+ Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg
+
+ if (isMR)
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+}
+
// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
// proper mechanism for supporting (ambiguous) feature dependent instructions.
@@ -637,6 +792,14 @@ void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
+
+ // moffset instructions are x86-32 only.
+ case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
+ case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
+ case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
+ case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
+ case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
+ case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
}
}
@@ -673,6 +836,8 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
bool MatchW = MatchInstructionImpl(Operands, Inst);
Tmp[Base.size()] = 'l';
bool MatchL = MatchInstructionImpl(Operands, Inst);
+ Tmp[Base.size()] = 'q';
+ bool MatchQ = MatchInstructionImpl(Operands, Inst);
// Restore the old token.
Op->setTokenValue(Base);
@@ -680,7 +845,7 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
- if (MatchB + MatchW + MatchL == 2)
+ if (MatchB + MatchW + MatchL + MatchQ == 3)
return false;
// Otherwise, the match failed.
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 8b0ed1c..183213d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -58,12 +58,11 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
if (Subtarget->isTargetCOFF()) {
- const Function *F = MF.getFunction();
- OutStreamer.EmitRawText("\t.def\t " + Twine(CurrentFnSym->getName()) +
- ";\t.scl\t" +
- Twine(F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) +
- ";\t.type\t" + Twine(COFF::DT_FCN << COFF::N_BTSHFT)
- + ";\t.endef");
+ bool Intrn = MF.getFunction()->hasInternalLinkage();
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT);
+ OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EndCOFFSymbolDef();
}
// Have common code print out the function header with linkage info etc.
@@ -571,44 +570,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
// Emit type information for external functions
- for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(),
- E = COFFMMI.stub_end(); I != E; ++I) {
- OutStreamer.EmitRawText("\t.def\t " + Twine(I->getKeyData()) +
- ";\t.scl\t" + Twine(COFF::C_EXT) +
- ";\t.type\t" +
- Twine(COFF::DT_FCN << COFF::N_BTSHFT) +
- ";\t.endef");
+ typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+ for (externals_iterator I = COFFMMI.externals_begin(),
+ E = COFFMMI.externals_end();
+ I != E; ++I) {
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT);
+ OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EndCOFFSymbolDef();
}
- if (Subtarget->isTargetCygMing()) {
- // Necessary for dllexport support
- std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
-
- const TargetLoweringObjectFileCOFF &TLOFCOFF =
- static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
-
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->hasDLLExportLinkage())
- DLLExportedFns.push_back(Mang->getSymbol(I));
-
- for (Module::const_global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I)
- if (I->hasDLLExportLinkage())
- DLLExportedGlobals.push_back(Mang->getSymbol(I));
-
- // Output linker support code for dllexported globals on windows.
- if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
- OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve",
- true,
- SectionKind::getMetadata()));
- for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i)
- OutStreamer.EmitRawText("\t.ascii \" -export:" +
- Twine(DLLExportedGlobals[i]->getName()) +
- ",data\"");
-
- for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i)
- OutStreamer.EmitRawText("\t.ascii \" -export:" +
- Twine(DLLExportedFns[i]->getName()) + "\"");
+ // Necessary for dllexport support
+ std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+ const TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedFns.push_back(Mang->getSymbol(I));
+
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+ // Output linker support code for dllexported globals on windows.
+ if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+ OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+ SmallString<128> name;
+ for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedGlobals[i]->getName();
+ if (Subtarget->isTargetWindows())
+ name += ",DATA";
+ else
+ name += ",data";
+ OutStreamer.EmitBytes(name, 0);
+ }
+
+ for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedFns[i]->getName();
+ OutStreamer.EmitBytes(name, 0);
}
}
}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index 95984b2..b5a7f8d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -31,7 +31,7 @@ class MCInst;
class MCStreamer;
class MCSymbol;
-class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
public:
explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index effc8ed..4edeca9 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -224,6 +224,60 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
OutMI.addOperand(OutMI.getOperand(0));
}
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+ unsigned ImmOp = Inst.getNumOperands() - 1;
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+ ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+ Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(0).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(ImmOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+ bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+ unsigned AddrBase = IsStore;
+ unsigned RegOp = IsStore ? 0 : 5;
+ unsigned AddrOp = AddrBase + 3;
+ assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+ Inst.getOperand(AddrBase + 0).isReg() && // base
+ Inst.getOperand(AddrBase + 1).isImm() && // scale
+ Inst.getOperand(AddrBase + 2).isReg() && // index register
+ (Inst.getOperand(AddrOp).isExpr() || // address
+ Inst.getOperand(AddrOp).isImm())&&
+ Inst.getOperand(AddrBase + 4).isReg() && // segment
+ "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(RegOp).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // Check whether this is an absolute address.
+ if (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 1).getImm() != 1)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(AddrOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
@@ -309,8 +363,32 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
-
-
+
+ // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // register inputs modeled as normal uses instead of implicit uses. As such,
+ // truncate off all but the first operand (the callee). FIXME: Change isel.
+ case X86::TAILJMPr:
+ case X86::TAILJMPr64:
+ case X86::CALL64r:
+ case X86::CALL64pcrel32: {
+ unsigned Opcode = OutMI.getOpcode();
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: {
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::TAILJMP_1);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -332,6 +410,61 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
+
+ // We don't currently select the correct instruction form for instructions
+ // which have a short %eax, etc. form. Handle this by custom lowering, for
+ // now.
+ //
+ // Note, we are currently not handling the following instructions:
+ // MOV64ao8, MOV64o8a
+ // XCHG16ar, XCHG32ar, XCHG64ar
+ case X86::MOV8mr_NOREX:
+ case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+ case X86::MOV8rm_NOREX:
+ case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
+ case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
+ case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+
+ case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
+ case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
+ case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break;
+ case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break;
+ case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break;
+ case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break;
+ case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break;
+ case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break;
+ case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break;
+ case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break;
+ case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break;
+ case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break;
+ case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break;
+ case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break;
+ case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break;
+ case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break;
+ case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break;
+ case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break;
+ case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break;
+ case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break;
+ case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break;
+ case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break;
+ case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break;
+ case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break;
+ case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break;
+ case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break;
+ case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break;
+ case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break;
+ case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break;
+ case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+ case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+ case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+ case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break;
+ case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break;
+ case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break;
+ case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break;
}
}
@@ -346,7 +479,7 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// cast away const; DIetc do not take const operands for some reason.
DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
if (V.getContext().isSubprogram())
- O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
O << V.getName();
O << " <- ";
// Frame address. Currently handles register +- offset only.
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
index ebd23f6..9e5474f 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -25,7 +25,7 @@ namespace llvm {
class X86Subtarget;
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class VISIBILITY_HIDDEN X86MCInstLower {
+class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
Mangler *Mang;
X86AsmPrinter &AsmPrinter;
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index da6bb91..1334820 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -39,7 +39,12 @@ set(sources
if( CMAKE_CL_64 )
enable_language(ASM_MASM)
- set(sources ${sources} X86CompilationCallback_Win64.asm)
+ ADD_CUSTOM_COMMAND(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
+ COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+ )
+ set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
endif()
add_llvm_target(X86CodeGen ${sources})
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 62e7357..8a5a630 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -155,7 +155,57 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
///
/// @param mcInst - The MCInst to append to.
/// @param immediate - The immediate value to append.
-static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+static void translateImmediate(MCInst &mcInst,
+ uint64_t immediate,
+ OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ // Sign-extend the immediate if necessary.
+
+ OperandType type = operand.type;
+
+ if (type == TYPE_RELv) {
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 8:
+ type = TYPE_MOFFS8;
+ break;
+ case 16:
+ type = TYPE_MOFFS16;
+ break;
+ case 32:
+ type = TYPE_MOFFS32;
+ break;
+ case 64:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
+
+ switch (type) {
+ case TYPE_MOFFS8:
+ case TYPE_REL8:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case TYPE_MOFFS16:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case TYPE_MOFFS32:
+ case TYPE_REL32:
+ case TYPE_REL64:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case TYPE_MOFFS64:
+ default:
+ // operand is 64 bits wide. Do nothing.
+ break;
+ }
+
mcInst.addOperand(MCOperand::CreateImm(immediate));
}
@@ -370,8 +420,7 @@ static bool translateRM(MCInst &mcInst,
case TYPE_XMM64:
case TYPE_XMM128:
case TYPE_DEBUGREG:
- case TYPE_CR32:
- case TYPE_CR64:
+ case TYPE_CONTROLREG:
return translateRMRegister(mcInst, insn);
case TYPE_M:
case TYPE_M8:
@@ -447,8 +496,10 @@ static bool translateOperand(MCInst &mcInst,
case ENCODING_IO:
case ENCODING_Iv:
case ENCODING_Ia:
- translateImmediate(mcInst,
- insn.immediates[insn.numImmediatesTranslated++]);
+ translateImmediate(mcInst,
+ insn.immediates[insn.numImmediatesTranslated++],
+ operand,
+ insn);
return false;
case ENCODING_RB:
case ENCODING_RW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 64f6b2d..6c3ff6b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1034,14 +1034,10 @@ static int readModRM(struct InternalInstruction* insn) {
if (index > 7) \
*valid = 0; \
return prefix##_DR0 + index; \
- case TYPE_CR32: \
- if (index > 7) \
- *valid = 0; \
- return prefix##_ECR0 + index; \
- case TYPE_CR64: \
+ case TYPE_CONTROLREG: \
if (index > 8) \
*valid = 0; \
- return prefix##_RCR0 + index; \
+ return prefix##_CR0 + index; \
} \
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 462cf68..28ba86b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -225,26 +225,16 @@ extern "C" {
ENTRY(DR6) \
ENTRY(DR7)
-#define REGS_CONTROL_32BIT \
- ENTRY(ECR0) \
- ENTRY(ECR1) \
- ENTRY(ECR2) \
- ENTRY(ECR3) \
- ENTRY(ECR4) \
- ENTRY(ECR5) \
- ENTRY(ECR6) \
- ENTRY(ECR7)
-
-#define REGS_CONTROL_64BIT \
- ENTRY(RCR0) \
- ENTRY(RCR1) \
- ENTRY(RCR2) \
- ENTRY(RCR3) \
- ENTRY(RCR4) \
- ENTRY(RCR5) \
- ENTRY(RCR6) \
- ENTRY(RCR7) \
- ENTRY(RCR8)
+#define REGS_CONTROL \
+ ENTRY(CR0) \
+ ENTRY(CR1) \
+ ENTRY(CR2) \
+ ENTRY(CR3) \
+ ENTRY(CR4) \
+ ENTRY(CR5) \
+ ENTRY(CR6) \
+ ENTRY(CR7) \
+ ENTRY(CR8)
#define ALL_EA_BASES \
EA_BASES_16BIT \
@@ -264,8 +254,7 @@ extern "C" {
REGS_XMM \
REGS_SEGMENT \
REGS_DEBUG \
- REGS_CONTROL_32BIT \
- REGS_CONTROL_64BIT \
+ REGS_CONTROL \
ENTRY(RIP)
/*
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 4a7cd57..0f33f52 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -280,8 +280,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
- ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \
- ENUM_ENTRY(TYPE_CR64, "8-byte") \
+ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
\
ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index 5e80845..dab070e 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -155,9 +155,7 @@ char SSEDomainFixPass::ID = 0;
/// Translate TRI register number to an index into our smaller tables of
/// interesting registers. Return -1 for boring registers.
int SSEDomainFixPass::RegIndex(unsigned reg) {
- // Registers are sorted lexicographically.
- // We just need them to be consecutive, ordering doesn't matter.
- assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+ assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
reg -= X86::XMM0;
return reg < NumRegs ? (int) reg : -1;
}
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index ba9c1d0..151087f 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -12,6 +12,7 @@
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -43,20 +44,19 @@ public:
X86AsmBackend(const Target &T)
: TargetAsmBackend(T) {}
- void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF,
+ void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF,
uint64_t Value) const {
- unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+ unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
- assert(Fixup.Offset + Size <= DF.getContents().size() &&
+ assert(Fixup.getOffset() + Size <= DF.getContents().size() &&
"Invalid fixup offset!");
for (unsigned i = 0; i != Size; ++i)
- DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8));
+ DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
- bool MayNeedRelaxation(const MCInst &Inst,
- const SmallVectorImpl<MCAsmFixup> &Fixups) const;
+ bool MayNeedRelaxation(const MCInst &Inst) const;
- void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const;
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
@@ -75,6 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
case X86::JG_1: return X86::JG_4;
case X86::JLE_1: return X86::JLE_4;
case X86::JL_1: return X86::JL_4;
+ case X86::TAILJMP_1:
case X86::JMP_1: return X86::JMP_4;
case X86::JNE_1: return X86::JNE_4;
case X86::JNO_1: return X86::JNO_4;
@@ -86,35 +87,33 @@ static unsigned getRelaxedOpcode(unsigned Op) {
}
}
-bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst,
- const SmallVectorImpl<MCAsmFixup> &Fixups) const {
- // Check for a 1byte pcrel fixup, and enforce that we would know how to relax
- // this instruction.
- for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
- if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) {
- assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode());
- return true;
- }
- }
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // Check if this instruction is ever relaxable.
+ if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
- return false;
+ // If so, just assume it can be relaxed. Once we support relaxing more complex
+ // instructions we should check that the instruction actually has symbolic
+ // operands before doing this, but we need to be careful about things like
+ // PCrel.
+ return true;
}
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
-void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF,
- MCInst &Res) const {
+void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
- unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode());
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
- if (RelaxedOp == IF->getInst().getOpcode()) {
+ if (RelaxedOp == Inst.getOpcode()) {
SmallString<256> Tmp;
raw_svector_ostream OS(Tmp);
- IF->getInst().dump_pretty(OS);
+ Inst.dump_pretty(OS);
+ OS << "\n";
report_fatal_error("unexpected instruction to relax: " + OS.str());
}
- Res = IF->getInst();
+ Res = Inst;
Res.setOpcode(RelaxedOp);
}
@@ -199,6 +198,18 @@ public:
}
};
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_32AsmBackend(const Target &T)
+ : ELFX86AsmBackend(T) {}
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_64AsmBackend(const Target &T)
+ : ELFX86AsmBackend(T) {}
+};
+
class DarwinX86AsmBackend : public X86AsmBackend {
public:
DarwinX86AsmBackend(const Target &T)
@@ -210,7 +221,8 @@ public:
bool isVirtualSection(const MCSection &Section) const {
const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_GB_ZEROFILL);
+ SMO.getType() == MCSectionMachO::S_GB_ZEROFILL ||
+ SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
}
};
@@ -247,6 +259,26 @@ public:
const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
}
+
+ virtual bool isSectionAtomizable(const MCSection &Section) const {
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+ switch (SMO.getType()) {
+ default:
+ return true;
+
+ case MCSectionMachO::S_4BYTE_LITERALS:
+ case MCSectionMachO::S_8BYTE_LITERALS:
+ case MCSectionMachO::S_16BYTE_LITERALS:
+ case MCSectionMachO::S_LITERAL_POINTERS:
+ case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+ case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+ case MCSectionMachO::S_INTERPOSING:
+ return false;
+ }
+ }
};
}
@@ -257,7 +289,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
case Triple::Darwin:
return new DarwinX86_32AsmBackend(T);
default:
- return new ELFX86AsmBackend(T);
+ return new ELFX86_32AsmBackend(T);
}
}
@@ -267,6 +299,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
default:
- return new ELFX86AsmBackend(T);
+ return new ELFX86_64AsmBackend(T);
}
}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index eece462..98ab2a6 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -15,7 +15,7 @@
#define X86COFF_MACHINEMODULEINFO_H
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/DenseSet.h"
#include "X86MachineFunctionInfo.h"
namespace llvm {
@@ -25,18 +25,18 @@ namespace llvm {
/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
/// for X86 COFF targets.
class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
- StringSet<> CygMingStubs;
+ DenseSet<MCSymbol const *> Externals;
public:
X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
virtual ~X86COFFMachineModuleInfo();
- void addExternalFunction(StringRef Name) {
- CygMingStubs.insert(Name);
+ void addExternalFunction(MCSymbol* Symbol) {
+ Externals.insert(Symbol);
}
- typedef StringSet<>::const_iterator stub_iterator;
- stub_iterator stub_begin() const { return CygMingStubs.begin(); }
- stub_iterator stub_end() const { return CygMingStubs.end(); }
+ typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+ externals_iterator externals_begin() const { return Externals.begin(); }
+ externals_iterator externals_end() const { return Externals.end(); }
};
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index fd15efd..a5774e1 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -307,6 +307,20 @@ def CC_X86_32_FastCall : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_ThisCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first integer argument is passed in ECX
+ CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
def CC_X86_32_FastCC : CallingConv<[
// Handles byval parameters. Note that we can't rely on the delegation
// to CC_X86_32_Common for this because that happens after code that
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ff9208c..1bc5eb7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -180,6 +180,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
+ else if (CC == CallingConv::X86_ThisCall)
+ return CC_X86_32_ThisCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else if (CC == CallingConv::GHC)
@@ -324,7 +326,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned Src, EVT SrcVT,
unsigned &ResultReg) {
- unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
if (RR != 0) {
ResultReg = RR;
@@ -416,7 +419,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
(S == 1 || S == 2 || S == 4 || S == 8)) {
// Scaled-index addressing.
Scale = S;
- IndexReg = getRegForGEPIndex(Op);
+ IndexReg = getRegForGEPIndex(Op).first;
if (IndexReg == 0)
return false;
} else
@@ -802,7 +805,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0) return false;
// Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
if (ResultReg == 0) return false;
UpdateValueMap(I, ResultReg);
return true;
@@ -913,7 +916,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
const MachineInstr &MI = *RI;
- if (MI.modifiesRegister(Reg)) {
+ if (MI.definesRegister(Reg)) {
unsigned Src, Dst, SrcSR, DstSR;
if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
@@ -1019,14 +1022,14 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
- TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
+ TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL);
// The shift instruction uses X86::CL. If we defined a super-register
// of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
// we're doing here.
if (CReg != X86::CL)
BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
- .addReg(CReg).addImm(X86::SUBREG_8BIT);
+ .addReg(CReg).addImm(X86::sub_8bit);
unsigned ResultReg = createResultReg(RC);
BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
@@ -1133,7 +1136,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
// Then issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
- CopyReg, X86::SUBREG_8BIT);
+ CopyReg, /*Kill=*/true,
+ X86::sub_8bit);
if (!ResultReg)
return false;
@@ -1436,7 +1440,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
case CCValAssign::BCvt: {
unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
- ISD::BIT_CONVERT, Arg);
+ ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
ArgVT = VA.getLocVT();
@@ -1447,7 +1451,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (VA.isRegLoc()) {
TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
- Arg, RC, RC);
+ Arg, RC, RC, DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
RegArgs.push_back(VA.getLocReg());
@@ -1473,7 +1477,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (Subtarget->isPICStyleGOT()) {
TargetRegisterClass *RC = X86::GR32RegisterClass;
unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC,
+ DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
}
@@ -1552,7 +1557,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
unsigned ResultReg = createResultReg(DstRC);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- RVLocs[0].getLocReg(), DstRC, SrcRC);
+ RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
if (CopyVT != RVLocs[0].getValVT()) {
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 541083f..747683d 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
-#include "X86Subtarget.h"
#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -42,12 +41,70 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; }
+ virtual const char *getPassName() const {
+ return "X86 FP_REG_KILL inserter";
+ }
};
char FPRegKiller::ID = 0;
}
-FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); }
+FunctionPass *llvm::createX87FPRegKillInserterPass() {
+ return new FPRegKiller();
+}
+
+/// isFPStackVReg - Return true if the specified vreg is from a fp stack
+/// register class.
+static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RegNo))
+ return false;
+
+ switch (MRI.getRegClass(RegNo)->getID()) {
+ default: return false;
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ return true;
+ }
+}
+
+
+/// ContainsFPStackCode - Return true if the specific MBB has floating point
+/// stack code, and thus needs an FP_REG_KILL.
+static bool ContainsFPStackCode(MachineBasicBlock *MBB,
+ const MachineRegisterInfo &MRI) {
+ // Scan the block, looking for instructions that define fp stack vregs.
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->getNumOperands() == 0 || !I->getOperand(0).isReg())
+ continue;
+
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
+ continue;
+
+ if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
+ return true;
+ }
+ }
+
+ // Check PHI nodes in successor blocks. These PHI's will be lowered to have
+ // a copy of the input value in this block, which is a definition of the
+ // value.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++ SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
+ I != E; ++I) {
+ // All PHI nodes are at the top of the block.
+ if (!I->isPHI()) break;
+
+ if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
+ return true;
+ }
+ }
+
+ return false;
+}
bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// If we are emitting FP stack code, scan the basic block to determine if this
@@ -65,14 +122,13 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// Fast-path: If nothing is using the x87 registers, we don't need to do
// any scanning.
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
return false;
bool Changed = false;
- const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
MachineFunction::iterator MBBI = MF.begin();
MachineFunction::iterator EndMBB = MF.end();
for (; MBBI != EndMBB; ++MBBI) {
@@ -87,48 +143,8 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- bool ContainsFPCode = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- !ContainsFPCode && I != E; ++I) {
- if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) {
- const TargetRegisterClass *clas;
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (I->getOperand(op).isReg() && I->getOperand(op).isDef() &&
- TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
- ((clas = MRI.getRegClass(I->getOperand(op).getReg())) ==
- X86::RFP32RegisterClass ||
- clas == X86::RFP64RegisterClass ||
- clas == X86::RFP80RegisterClass)) {
- ContainsFPCode = true;
- break;
- }
- }
- }
- }
- // Check PHI nodes in successor blocks. These PHI's will be lowered to have
- // a copy of the input value in this block. In SSE mode, we only care about
- // 80-bit values.
- if (!ContainsFPCode) {
- // Final check, check LLVM BB's that are successors to the LLVM BB
- // corresponding to BB for FP PHI nodes.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const PHINode *PN;
- for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
- !ContainsFPCode && SI != E; ++SI) {
- for (BasicBlock::const_iterator II = SI->begin();
- (PN = dyn_cast<PHINode>(II)); ++II) {
- if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
- (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) ||
- (!Subtarget.hasSSE2() &&
- PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
- ContainsFPCode = true;
- break;
- }
- }
- }
- }
- // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
- if (ContainsFPCode) {
+ // If we find any FP stack code, emit the FP_REG_KILL instruction.
+ if (ContainsFPStackCode(MBB, MRI)) {
BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index fd8bb1e..0f64383 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1693,7 +1693,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Result);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1834,7 +1834,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(8, MVT::i8)),
0);
// Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Result);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1883,7 +1883,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Extract the l-register.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Reg);
// Emit a testb.
@@ -1912,7 +1912,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
Reg.getValueType(), Reg, RC), 0);
// Extract the h-register.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);
// Emit a testb. No special NOREX tricks are needed since there's
@@ -1930,7 +1930,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Reg = N0.getNode()->getOperand(0);
// Extract the 16-bit subregister.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
MVT::i16, Reg);
// Emit a testw.
@@ -1946,7 +1946,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Reg = N0.getNode()->getOperand(0);
// Extract the 32-bit subregister.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
MVT::i32, Reg);
// Emit a testl.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6ce9ab7..b02c33d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -94,7 +94,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
setBooleanContents(ZeroOrOneBooleanContent);
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
if (Subtarget->isTargetDarwin()) {
@@ -145,13 +145,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
} else if (!UseSoftFloat) {
- if (X86ScalarSSEf64) {
- // We have an impenetrably clever algorithm for ui64->double only.
- setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
- }
+ // We have an algorithm for SSE2->double, and we turn this into a
+ // 64-bit FILD followed by conditional FADD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD for other targets.
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
}
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
@@ -215,9 +214,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
+ if (!X86ScalarSSEf64) {
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand);
+ // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
+ if (Subtarget->hasMMX() && !DisableMMX)
+ setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom);
+ else
+ setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand);
+ }
}
// Scalar integer divide and remainder are lowered to use operations that
@@ -679,6 +686,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSETCC, MVT::v8i8, Custom);
setOperationAction(ISD::VSETCC, MVT::v4i16, Custom);
setOperationAction(ISD::VSETCC, MVT::v2i32, Custom);
+
+ if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
+ setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
+ }
}
if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -1244,10 +1259,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
- if (!Reg) {
- Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64));
- FuncInfo->setSRetReturnReg(Reg);
- }
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
@@ -1384,6 +1397,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg,
return !Subtarget->is64Bit();
case CallingConv::X86_FastCall:
return !Subtarget->is64Bit();
+ case CallingConv::X86_ThisCall:
+ return !Subtarget->is64Bit();
case CallingConv::Fast:
return GuaranteedTailCallOpt;
case CallingConv::GHC:
@@ -1405,6 +1420,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
+ else if (CC == CallingConv::X86_ThisCall)
+ return CC_X86_32_ThisCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else if (CC == CallingConv::GHC)
@@ -1596,7 +1613,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
+ if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
true, false));
}
@@ -1716,7 +1734,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (!Is64Bit) {
// RegSaveFrameIndex is X86-64 only.
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
- if (CallConv == CallingConv::X86_FastCall)
+ if (CallConv == CallingConv::X86_FastCall ||
+ CallConv == CallingConv::X86_ThisCall)
// fastcc functions can't have varargs.
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
@@ -5272,7 +5291,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
- MFI->setHasCalls(true);
+ MFI->setAdjustsStack(true);
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
@@ -5462,7 +5481,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
+ SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
DebugLoc dl = Op.getDebugLoc();
@@ -5636,35 +5655,72 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
- // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
EVT SrcVT = N0.getValueType();
- if (SrcVT == MVT::i64) {
- // We only handle SSE2 f64 target here; caller can expand the rest.
- if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
- return SDValue();
-
+ EVT DstVT = Op.getValueType();
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
- } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) {
+ else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
- }
-
- assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!");
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
- SDValue WordOff = DAG.getConstant(4, getPointerTy());
- SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
- getPointerTy(), StackSlot, WordOff);
- SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ if (SrcVT == MVT::i32) {
+ SDValue WordOff = DAG.getConstant(4, getPointerTy());
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+ getPointerTy(), StackSlot, WordOff);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, NULL, 0, false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+ OffsetSlot, NULL, 0, false, false, 0);
+ SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ return Fild;
+ }
+
+ assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, NULL, 0, false, false, 0);
- SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
- OffsetSlot, NULL, 0, false, false, 0);
- return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ // For i64 source, we need to add the appropriate power of 2 if the input
+ // was negative. This is the same as the optimization in
+ // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+ // we must be careful to do the computation in x87 extended precision, not
+ // in SSE. (The generic code can't know it's OK to do this, or how to.)
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+ SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+
+ APInt FF(32, 0x5F800000ULL);
+
+ // Check whether the sign bit is set.
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+ Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+ // Load the value out, extending it from f32 to f80.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ FudgePtr, PseudoSourceValue::getConstantPool(),
+ 0, MVT::f32, false, false, 4);
+ // Extend everything to 80 bits to force it to be done on x87.
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+ return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
std::pair<SDValue,SDValue> X86TargetLowering::
@@ -6593,221 +6649,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops1, 2, dl);
}
-SDValue
-X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const {
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-
- // If not DWORD aligned or size is more than the threshold, call the library.
- // The libc version is likely to be faster for these cases. It can use the
- // address value and run time information about the CPU.
- if ((Align & 3) != 0 ||
- !ConstantSize ||
- ConstantSize->getZExtValue() >
- getSubtarget()->getMaxInlineSizeThreshold()) {
- SDValue InFlag(0, 0);
-
- // Check to see if there is a specialized entry-point for memory zeroing.
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
-
- if (const char *bzeroEntry = V &&
- V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
- EVT IntPtr = getPointerTy();
- const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- Entry.Node = Size;
- Args.push_back(Entry);
- std::pair<SDValue,SDValue> CallResult =
- LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
- false, false, false, false,
- 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
- return CallResult.second;
- }
-
- // Otherwise have the target-independent code call memset.
- return SDValue();
- }
-
- uint64_t SizeVal = ConstantSize->getZExtValue();
- SDValue InFlag(0, 0);
- EVT AVT;
- SDValue Count;
- ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
- unsigned BytesLeft = 0;
- bool TwoRepStos = false;
- if (ValC) {
- unsigned ValReg;
- uint64_t Val = ValC->getZExtValue() & 255;
-
- // If the value is a constant, then we can potentially use larger sets.
- switch (Align & 3) {
- case 2: // WORD aligned
- AVT = MVT::i16;
- ValReg = X86::AX;
- Val = (Val << 8) | Val;
- break;
- case 0: // DWORD aligned
- AVT = MVT::i32;
- ValReg = X86::EAX;
- Val = (Val << 8) | Val;
- Val = (Val << 16) | Val;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
- AVT = MVT::i64;
- ValReg = X86::RAX;
- Val = (Val << 32) | Val;
- }
- break;
- default: // Byte aligned
- AVT = MVT::i8;
- ValReg = X86::AL;
- Count = DAG.getIntPtrConstant(SizeVal);
- break;
- }
-
- if (AVT.bitsGT(MVT::i8)) {
- unsigned UBytes = AVT.getSizeInBits() / 8;
- Count = DAG.getIntPtrConstant(SizeVal / UBytes);
- BytesLeft = SizeVal % UBytes;
- }
-
- Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
- InFlag);
- InFlag = Chain.getValue(1);
- } else {
- AVT = MVT::i8;
- Count = DAG.getIntPtrConstant(SizeVal);
- Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
-
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
-
- if (TwoRepStos) {
- InFlag = Chain.getValue(1);
- Count = Size;
- EVT CVT = Count.getValueType();
- SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
- DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
- Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
- X86::ECX,
- Left, InFlag);
- InFlag = Chain.getValue(1);
- Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
- } else if (BytesLeft) {
- // Handle the last 1 - 7 bytes.
- unsigned Offset = SizeVal - BytesLeft;
- EVT AddrVT = Dst.getValueType();
- EVT SizeVT = Size.getValueType();
-
- Chain = DAG.getMemset(Chain, dl,
- DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
- DAG.getConstant(Offset, AddrVT)),
- Src,
- DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, DstSV, DstSVOff + Offset);
- }
-
- // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
- return Chain;
-}
-
-SDValue
-X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
- // This requires the copy size to be a constant, preferrably
- // within a subtarget-specific limit.
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (!ConstantSize)
- return SDValue();
- uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
- return SDValue();
-
- /// If not DWORD aligned, call the library.
- if ((Align & 3) != 0)
- return SDValue();
-
- // DWORD aligned
- EVT AVT = MVT::i32;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
- AVT = MVT::i64;
-
- unsigned UBytes = AVT.getSizeInBits() / 8;
- unsigned CountVal = SizeVal / UBytes;
- SDValue Count = DAG.getIntPtrConstant(CountVal);
- unsigned BytesLeft = SizeVal % UBytes;
-
- SDValue InFlag(0, 0);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
- X86::ESI,
- Src, InFlag);
- InFlag = Chain.getValue(1);
-
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
- SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
- array_lengthof(Ops));
-
- SmallVector<SDValue, 4> Results;
- Results.push_back(RepMovs);
- if (BytesLeft) {
- // Handle the last 1 - 7 bytes.
- unsigned Offset = SizeVal - BytesLeft;
- EVT DstVT = Dst.getValueType();
- EVT SrcVT = Src.getValueType();
- EVT SizeVT = Size.getValueType();
- Results.push_back(DAG.getMemcpy(Chain, dl,
- DAG.getNode(ISD::ADD, dl, DstVT, Dst,
- DAG.getConstant(Offset, DstVT)),
- DAG.getNode(ISD::ADD, dl, SrcVT, Src,
- DAG.getConstant(Offset, SrcVT)),
- DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, AlwaysInline,
- DstSV, DstSVOff + Offset,
- SrcSV, SrcSVOff + Offset));
- }
-
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Results[0], Results.size());
-}
-
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
@@ -7138,6 +6979,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
@@ -7161,6 +7005,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -7298,6 +7143,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
break;
}
case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
case CallingConv::Fast:
// Pass 'nest' parameter in EAX.
// Must be kept in sync with X86CallingConv.td
@@ -7630,6 +7476,27 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ EVT DstVT = Op.getValueType();
+ assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ Subtarget->hasMMX() && !DisableMMX) &&
+ "Unexpected custom BIT_CONVERT");
+ assert((DstVT == MVT::i64 ||
+ (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+ "Unexpected custom BIT_CONVERT");
+ // i64 <=> MMX conversions are Legal.
+ if (SrcVT==MVT::i64 && DstVT.isVector())
+ return Op;
+ if (DstVT==MVT::i64 && SrcVT.isVector())
+ return Op;
+ // MMX <=> MMX conversions are Legal.
+ if (SrcVT.isVector() && DstVT.isVector())
+ return Op;
+ // All other conversions need to be expanded.
+ return SDValue();
+}
SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
@@ -7699,6 +7566,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG);
}
}
@@ -8203,9 +8071,15 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
MachineOperand& dest1Oper = bInstr->getOperand(0);
MachineOperand& dest2Oper = bInstr->getOperand(1);
MachineOperand* argOpers[2 + X86AddrNumOperands];
- for (int i=0; i < 2 + X86AddrNumOperands; ++i)
+ for (int i=0; i < 2 + X86AddrNumOperands; ++i) {
argOpers[i] = &bInstr->getOperand(i+2);
+ // We use some of the operands multiple times, so conservatively just
+ // clear any kill flags that might be present.
+ if (argOpers[i]->isReg() && argOpers[i]->isUse())
+ argOpers[i]->setIsKill(false);
+ }
+
// x86 address has 5 operands: base, index, scale, displacement, and segment.
int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 440601f9..1ef1a7b 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -563,7 +563,7 @@ namespace llvm {
return !X86ScalarSSEf64 || VT == MVT::f80;
}
- virtual const X86Subtarget* getSubtarget() const {
+ const X86Subtarget* getSubtarget() const {
return Subtarget;
}
@@ -677,6 +677,7 @@ namespace llvm {
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
+ SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
@@ -743,23 +744,6 @@ namespace llvm {
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) const;
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const;
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
-
/// Utility function to emit string processing sse4.2 instructions
/// that return in xmm0.
/// This takes the instruction to expand, the associated machine basic
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index f5c3dbf..97eb17c 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -18,7 +18,9 @@
//
// 64-bits but only 32 bits are significant.
-def i64i32imm : Operand<i64>;
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
// 64-bits but only 32 bits are significant, and those bits are treated as being
// pc relative.
@@ -30,7 +32,7 @@ def i64i32imm_pcrel : Operand<i64> {
// 64-bits but only 8 bits are significant.
def i64i8imm : Operand<i64> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
}
// Special i64mem for addresses of load folding tail calls. These are not
@@ -198,6 +200,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
variable_ops),
"#TC_RETURN $dst $offset", []>;
+ let mayLoad = 1 in
def TCRETURNmi64 : I<0, Pseudo, (outs),
(ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
@@ -208,6 +211,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
+ let mayLoad = 1 in
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
}
@@ -241,6 +245,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
@@ -267,14 +272,16 @@ def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
"push{q}\t$imm", []>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
"push{q}\t$imm", []>;
-def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
"push{q}\t$imm", []>;
}
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
-def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf{q}", []>, REX_W;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
-def PUSHFQ64 : I<0x9C, RawFrm, (outs), (ins), "pushf{q}", []>;
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ Requires<[In64BitMode]>;
def LEA64_32r : I<0x8D, MRMSrcMem,
(outs GR32:$dst), (ins lea64_32mem:$src),
@@ -309,16 +316,22 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
} // Defs = [EFLAGS]
// Repeat string ops
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
[(X86rep_movs i64)]>, REP;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
[(X86rep_stos i64)]>, REP;
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>;
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
// Fast system-call instructions
def SYSEXIT64 : RI<0x35, RawFrm,
@@ -341,8 +354,17 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
[(set GR64:$dst, i64immSExt32:$src)]>;
}
+// The assembler accepts movq of a 64-bit immediate as an alternate spelling of
+// movabsq.
+let isAsmParserOnly = 1 in {
+def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let isCodeGenOnly = 1 in {
def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
@@ -398,9 +420,9 @@ def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
// Moves to and from control registers
-def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG_64:$src),
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_64:$dst), (ins GR64:$src),
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
// Sign/Zero extenders
@@ -478,7 +500,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
// In the case of a 32-bit def that is known to implicitly zero-extend,
// we can use a SUBREG_TO_REG.
def : Pat<(i64 (zext def32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
let neverHasSideEffects = 1 in {
let Defs = [RAX], Uses = [EAX] in
@@ -496,7 +518,7 @@ let neverHasSideEffects = 1 in {
let Defs = [EFLAGS] in {
-def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i32imm:$src),
+def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
"add{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -555,7 +577,7 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
let Uses = [EFLAGS] in {
-def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i32imm:$src),
+def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
"adc{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -565,9 +587,11 @@ def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst),
"adc{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst),
(ins GR64:$src1, GR64:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
@@ -605,9 +629,11 @@ def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86sub_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
// Register-Memory Subtraction
def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst),
@@ -629,7 +655,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
(X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
-def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src),
+def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
"sub{q}\t{$src, %rax|%rax, $src}", []>;
// Memory-Register Subtraction
@@ -657,9 +683,11 @@ def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst),
"sbb{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sbb{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
@@ -676,7 +704,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
[(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
-def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i32imm:$src),
+def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
"sbb{q}\t{$src, %rax|%rax, $src}", []>;
def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
@@ -1076,7 +1104,7 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)]>;
let Defs = [EFLAGS] in {
-def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i32imm:$src),
+def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
"and{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -1086,9 +1114,11 @@ def AND64rr : RI<0x21, MRMDestReg,
"and{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86and_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def AND64rm : RI<0x23, MRMSrcMem,
(outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
@@ -1129,9 +1159,11 @@ def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
"or{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86or_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
@@ -1162,7 +1194,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src),
+def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
"or{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -1172,9 +1204,11 @@ def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
"xor{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86xor_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
@@ -1205,7 +1239,7 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
+def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src),
"xor{q}\t{$src, %rax|%rax, $src}", []>;
} // Defs = [EFLAGS]
@@ -1216,7 +1250,7 @@ def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
// Integer comparison
let Defs = [EFLAGS] in {
-def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src),
+def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src),
"test{q}\t{$src, %rax|%rax, $src}", []>;
let isCommutable = 1 in
def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
@@ -1238,7 +1272,7 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
i64immSExt32:$src2), 0))]>;
-def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src),
+def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src),
"cmp{q}\t{$src, %rax|%rax, $src}", []>;
def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"cmp{q}\t{$src2, $src1|$src1, $src2}",
@@ -1293,7 +1327,8 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB,
+ REX_W;
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
@@ -1714,11 +1749,13 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in
def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in
def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -1730,7 +1767,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
"xchg{q}\t{$src, %rax|%rax, $src}", []>;
// Optimized codegen when the non-memory output is not used.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"lock\n\t"
@@ -1982,14 +2019,14 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
// defined after an extload.
def : Pat<(extloadi64i32 addr:$src),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
- x86_subreg_32bit)>;
+ sub_32bit)>;
// anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates.
def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
def : Pat<(i64 (anyext GR32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
//===----------------------------------------------------------------------===//
// Some peepholes
@@ -2016,54 +2053,54 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
(SUBREG_TO_REG
(i64 0),
(AND32ri
- (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
(i32 (GetLo32XForm imm:$imm))),
- x86_subreg_32bit)>;
+ sub_32bit)>;
// r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
- (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
// r & (2^16-1) ==> movz
def : Pat<(and GR64:$src, 0xffff),
- (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR64:$src, 0xff),
- (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
Requires<[In64BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>,
+ (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
Requires<[In64BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR64:$src, i32),
- (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
def : Pat<(sext_inreg GR64:$src, i16),
- (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
def : Pat<(sext_inreg GR64:$src, i8),
- (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
Requires<[In64BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
+ (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
Requires<[In64BitMode]>;
// trunc patterns
def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
def : Pat<(i16 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
def : Pat<(i8 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
def : Pat<(i8 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
+ (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
Requires<[In64BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
- (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
Requires<[In64BitMode]>;
// h-register tricks.
@@ -2079,67 +2116,67 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_16bit)>,
+ sub_8bit_hi)),
+ sub_16bit)>,
Requires<[In64BitMode]>;
def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
// h-register extract and store.
def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- x86_subreg_8bit_hi))>;
+ sub_8bit_hi))>;
def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
// (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a21bfb9..34e12ca 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -744,17 +744,17 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
case X86::MOVZX64rr8:
- SubIdx = 1;
+ SubIdx = X86::sub_8bit;
break;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
case X86::MOVZX64rr16:
- SubIdx = 3;
+ SubIdx = X86::sub_16bit;
break;
case X86::MOVSX64rr32:
case X86::MOVZX64rr32:
- SubIdx = 4;
+ SubIdx = X86::sub_32bit;
break;
}
return true;
@@ -1065,7 +1065,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL = MBB.findDebugLoc(I);
+ DebugLoc DL = Orig->getDebugLoc();
if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
DestReg = TRI->getSubReg(DestReg, SubIdx);
@@ -1154,7 +1154,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
.addReg(leaInReg)
.addReg(Src, getKillRegState(isKill))
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
@@ -1198,7 +1198,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
.addReg(leaInReg2)
.addReg(Src2, getKillRegState(isKill2))
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
addRegReg(MIB, leaInReg, true, leaInReg2, true);
}
if (LV && isKill2 && InsMI2)
@@ -1212,7 +1212,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(leaOutReg, RegState::Kill)
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
if (LV) {
// Update live variables
@@ -1901,8 +1901,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
// Determine if DstRC and SrcRC have a common superclass in common.
const TargetRegisterClass *CommonRC = DestRC;
@@ -1993,12 +1993,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
if (SrcReg != X86::EFLAGS)
return false;
if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFQ64));
+ BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
return true;
} else if (DestRC == &X86::GR32RegClass ||
DestRC == &X86::GR32_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFD));
+ BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
return true;
}
@@ -2007,12 +2007,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
return false;
if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
- BuildMI(MBB, MI, DL, get(X86::POPFQ));
+ BuildMI(MBB, MI, DL, get(X86::POPF64));
return true;
} else if (SrcRC == &X86::GR32RegClass ||
DestRC == &X86::GR32_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
- BuildMI(MBB, MI, DL, get(X86::POPFD));
+ BuildMI(MBB, MI, DL, get(X86::POPF32));
return true;
}
}
@@ -2133,7 +2133,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
@@ -2230,7 +2231,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
@@ -2256,7 +2258,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -2284,7 +2287,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass,
+ &RI);
}
}
@@ -2294,7 +2298,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -2314,7 +2319,7 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (RegClass != &X86::VR128RegClass && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
}
}
return true;
@@ -2478,9 +2483,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned DstReg = NewMI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
- 4/*x86_subreg_32bit*/));
+ X86::sub_32bit));
else
- NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/);
+ NewMI->getOperand(0).setSubReg(X86::sub_32bit);
}
return NewMI;
}
@@ -2526,9 +2531,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (MI->getOpcode()) {
default: return NULL;
case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
}
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
@@ -2595,9 +2600,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (MI->getOpcode()) {
default: return NULL;
case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
}
// Change to CMPXXri r, 0 first.
MI->setDesc(get(NewOpc));
@@ -2805,16 +2810,22 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
switch (DataMI->getOpcode()) {
default: break;
case X86::CMP64ri32:
+ case X86::CMP64ri8:
case X86::CMP32ri:
+ case X86::CMP32ri8:
case X86::CMP16ri:
+ case X86::CMP16ri8:
case X86::CMP8ri: {
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
if (MO1.getImm() == 0) {
switch (DataMI->getOpcode()) {
default: break;
+ case X86::CMP64ri8:
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri8:
case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri8:
case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index df99c7f..62d7c74 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -590,11 +590,13 @@ public:
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
SmallVectorImpl<MachineOperand> &Addr,
@@ -606,7 +608,8 @@ public:
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
@@ -617,11 +620,13 @@ public:
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a2754ea..0d59c42 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -195,15 +195,15 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
//
def X86MemAsmOperand : AsmOperandClass {
let Name = "Mem";
- let SuperClass = ?;
-}
-def X86AbsMemAsmOperand : AsmOperandClass {
- let Name = "AbsMem";
- let SuperClass = X86MemAsmOperand;
+ let SuperClasses = [];
}
def X86NoSegMemAsmOperand : AsmOperandClass {
let Name = "NoSegMem";
- let SuperClass = X86MemAsmOperand;
+ let SuperClasses = [X86MemAsmOperand];
+}
+def X86AbsMemAsmOperand : AsmOperandClass {
+ let Name = "AbsMem";
+ let SuperClasses = [X86NoSegMemAsmOperand];
}
class X86MemOperand<string printMethod> : Operand<iPTR> {
let PrintMethod = printMethod;
@@ -270,19 +270,49 @@ def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
}
-def ImmSExt8AsmOperand : AsmOperandClass {
- let Name = "ImmSExt8";
- let SuperClass = ImmAsmOperand;
+class ImmSExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti16i8";
+ let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti32i8";
+}
+
+// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i8";
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand];
}
// A couple of more descriptive operand definitions.
// 16-bits but only 8 bits are significant.
def i16i8imm : Operand<i16> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti16i8AsmOperand;
}
// 32-bits but only 8 bits are significant.
def i32i8imm : Operand<i32> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti32i8AsmOperand;
}
//===----------------------------------------------------------------------===//
@@ -542,8 +572,10 @@ let neverHasSideEffects = 1 in {
}
// Trap
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>;
-def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
+// FIXME: need to make sure that "int $3" matches int3
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
@@ -693,6 +725,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNri : I<0, Pseudo, (outs),
(ins GR32_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
+ let mayLoad = 1 in
def TCRETURNmi : I<0, Pseudo, (outs),
(ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
@@ -706,8 +739,16 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL",
[]>;
+ let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL", []>;
+
+ // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL
+ // marker on instructions, while still being able to relax.
+ let isCodeGenOnly = 1 in {
+ def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst # TAILCALL", []>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -719,10 +760,12 @@ def LEAVE : I<0xC9, RawFrm,
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+let mayLoad = 1 in
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
@@ -762,12 +805,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
}
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf{l}", []>;
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
-def PUSHF : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
-def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf{l}", []>;
+def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
+def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let isTwoAddress = 1 in // GR32 = bswap GR32
@@ -867,7 +912,7 @@ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
let Defs = [RAX, RCX, RDX] in
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
-let isBarrier = 1, hasCtrlDep = 1 in {
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
}
@@ -966,36 +1011,47 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store (i32 imm:$src), addr:$dst)]>;
-def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src),
+/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
+/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|%al, $src}", []>;
-def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src),
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
"mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
"mov{l}\t{$src, %eax|%eax, $src}", []>;
-
-def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
"mov{b}\t{%al, $dst|$dst, %al}", []>;
-def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
"mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, %eax}", []>;
-
+
// Moves to and from segment registers
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+let isCodeGenOnly = 1 in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
"mov{b}\t{$src, $dst|$dst, $src}", []>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
@@ -1059,10 +1115,10 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
// Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG_32:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions...
@@ -1746,6 +1802,7 @@ def AND32rr : I<0x21, MRMDestReg,
// AND instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"and{b}\t{$src2, $dst|$dst, $src2}", []>;
def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
@@ -1754,6 +1811,7 @@ def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def AND8rm : I<0x22, MRMSrcMem,
(outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
@@ -1872,6 +1930,7 @@ def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
// OR instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"or{b}\t{$src2, $dst|$dst, $src2}", []>;
def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
@@ -1880,6 +1939,7 @@ def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
@@ -1988,6 +2048,7 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
// XOR instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"xor{b}\t{$src2, $dst|$dst, $src2}", []>;
def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
@@ -1996,6 +2057,7 @@ def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def XOR8rm : I<0x32, MRMSrcMem,
(outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
@@ -2793,6 +2855,7 @@ def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst),
[(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
}
+let isCodeGenOnly = 1 in {
def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"adc{b}\t{$src2, $dst|$dst, $src2}", []>;
def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
@@ -2801,6 +2864,7 @@ def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"adc{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst),
(ins GR8:$src1, i8mem:$src2),
@@ -2888,6 +2952,7 @@ def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
[(set GR32:$dst, EFLAGS,
(X86sub_flag GR32:$src1, GR32:$src2))]>;
+let isCodeGenOnly = 1 in {
def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}", []>;
def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
@@ -2896,6 +2961,7 @@ def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
// Register-Memory Subtraction
def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
@@ -3039,6 +3105,7 @@ let isTwoAddress = 0 in {
"sbb{l}\t{$src, %eax|%eax, $src}", []>;
}
+let isCodeGenOnly = 1 in {
def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
@@ -3047,6 +3114,7 @@ def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}",
@@ -3864,12 +3932,14 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+}
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -3878,12 +3948,14 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
@@ -3891,7 +3963,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
// Optimized codegen when the non-memory output is not used.
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"lock\n\t"
"add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
@@ -4453,7 +4525,7 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
def : Pat<(i32 (anyext GR16:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
//===----------------------------------------------------------------------===//
@@ -4473,81 +4545,81 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
// r & (2^16-1) ==> movz
def : Pat<(and GR32:$src1, 0xffff),
- (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
GR32_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
(MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
GR16_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR32:$src, i16),
- (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
(MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
+ (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
def : Pat<(i8 (trunc GR32:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit)>,
+ sub_8bit)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit)>,
+ sub_8bit)>,
Requires<[In32BitMode]>;
// h-register tricks
def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)>,
+ sub_8bit_hi)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi)>,
+ sub_8bit_hi)>,
Requires<[In32BitMode]>;
def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32rr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_16bit)>,
+ sub_8bit_hi)),
+ sub_16bit)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
// (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 744af50..0952fc8 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -117,9 +117,6 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs),
- (ins i64mem:$dst, VR64:$src),
- "movq\t{$src, $dst|$dst, $src}", []>;
let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
@@ -133,10 +130,10 @@ let neverHasSideEffects = 1 in
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (v1i64 (scalar_to_vector GR64:$src)))]>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v1i64 (scalar_to_vector GR64:$src)))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2129580..5580ba7 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -117,17 +117,17 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
-def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
-def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
(f64 (alignedload node:$ptr))>;
-def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
-def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
(v2f64 (alignedload node:$ptr))>;
-def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
(v4i32 (alignedload node:$ptr))>;
-def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
(v2i64 (alignedload node:$ptr))>;
// Like 'load', but uses special alignment checks suitable for use in
@@ -387,11 +387,11 @@ def MOVSSrr : SSI<0x10, MRMSrcReg,
let AddedComplexity = 15 in
def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
// Loading from memory automatically zeroing upper bits.
let canFoldAsLoad = 1, isReMaterializable = 1 in
@@ -403,11 +403,11 @@ def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
// with SUBREG_TO_REG.
let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
}
// Store scalar value to memory.
@@ -419,7 +419,7 @@ def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
// Conversion instructions
def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
@@ -449,7 +449,7 @@ def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
[(set GR32:$dst, (int_x86_sse_cvtss2si
(load addr:$src)))]>;
-// Match intrinisics which expect MM and XMM operand(s).
+// Match intrinsics which expect MM and XMM operand(s).
def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvtps2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
@@ -509,6 +509,17 @@ let mayLoad = 1 in
def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+let mayLoad = 1 in
+ def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
let Defs = [EFLAGS] in {
@@ -518,25 +529,25 @@ def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
"ucomiss\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
-
+
def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"comiss\t{$src2, $src1|$src1, $src2}", []>;
def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"comiss\t{$src2, $src1|$src1, $src2}", []>;
-
+
} // Defs = [EFLAGS]
// Aliases to match intrinsics which expect XMM operand(s).
let Constraints = "$src1 = $dst" in {
def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss
+ [(set VR128:$dst, (int_x86_sse_cmp_ss
VR128:$src1,
VR128:$src, imm:$cc))]>;
def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
@@ -1009,6 +1020,16 @@ let Constraints = "$src1 = $dst" in {
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
(memop addr:$src), imm:$cc))]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+ def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+}
}
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
@@ -1102,7 +1123,8 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
}
// Load, store, and memory fence
-def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+ TB, Requires<[HasSSE1]>;
// MXCSR register
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
@@ -1130,7 +1152,7 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
//===---------------------------------------------------------------------===//
// SSE2 Instructions
@@ -1152,11 +1174,11 @@ def MOVSDrr : SDI<0x10, MRMSrcReg,
let AddedComplexity = 15 in
def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
(MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
// Loading from memory automatically zeroing upper bits.
let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
@@ -1168,15 +1190,15 @@ def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
// with SUBREG_TO_REG.
let AddedComplexity = 20 in {
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
}
// Store scalar value to memory.
@@ -1188,7 +1210,7 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
// Conversion instructions
def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
@@ -1255,7 +1277,7 @@ def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
[(set GR32:$dst, (int_x86_sse2_cvtsd2si
(load addr:$src)))]>;
-// Match intrinisics which expect MM and XMM operand(s).
+// Match intrinsics which expect MM and XMM operand(s).
def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvtpd2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
@@ -1297,6 +1319,17 @@ let mayLoad = 1 in
def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+let mayLoad = 1 in
+ def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
let Defs = [EFLAGS] in {
@@ -1311,13 +1344,13 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
// Aliases to match intrinsics which expect XMM operand(s).
let Constraints = "$src1 = $dst" in {
def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
VR128:$src, imm:$cc))]>;
def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
@@ -1655,7 +1688,7 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))]>,
XS, Requires<[HasSSE2]>;
def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -1890,6 +1923,16 @@ let Constraints = "$src1 = $dst" in {
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
(memop addr:$src), imm:$cc))]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+ def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
@@ -1980,24 +2023,24 @@ let Constraints = "$src1 = $dst" in {
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
bit Commutable = 0> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
let isCommutable = Commutable;
}
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64
+ (bitconvert (memopv2i64
addr:$src2))))]>;
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr,
Intrinsic IntId, Intrinsic IntId2> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
@@ -2006,7 +2049,7 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
- def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
@@ -2015,13 +2058,13 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
let isCommutable = Commutable;
}
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1,
@@ -2416,6 +2459,10 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
//TODO: custom lower this so as to never even generate the noop
def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
(i8 0)), (NOOP)>;
@@ -2462,7 +2509,7 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)]>;
def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -2903,7 +2950,7 @@ defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw,
int_x86_ssse3_pmul_hr_sw_128, 1>;
-
+
defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
int_x86_ssse3_pshuf_b,
int_x86_ssse3_pshuf_b_128>;
@@ -3042,10 +3089,10 @@ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(MOVSSrr (v4f32 (V_SET0PS)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(MOVSSrr (v4i32 (V_SET0PI)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
}
// Splat v2f64 / v2i64
@@ -3181,17 +3228,17 @@ let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
(MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
Requires<[HasSSE2]>;
}
@@ -3464,14 +3511,14 @@ let Constraints = "$src1 = $dst" in {
let Constraints = "$src1 = $dst" in {
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
OpSize {
let isCommutable = Commutable;
}
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpNode VR128:$src1,
@@ -3949,15 +3996,15 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"#PCMPESTRM128rr PSEUDO!",
- [(set VR128:$dst,
- (int_x86_sse42_pcmpestrm128
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"#PCMPESTRM128rm PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
OpSize;
}
@@ -3972,7 +4019,7 @@ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
let Defs = [ECX, EFLAGS] in {
multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
- def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
@@ -4003,7 +4050,7 @@ let Uses = [EAX, EDX] in {
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
- [(set ECX,
+ [(set ECX,
(IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
(implicit EFLAGS)]>, OpSize;
}
@@ -4081,16 +4128,15 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
OpSize;
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
+ (ins VR128:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
OpSize;
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1, i32i8imm:$src2),
+ (ins i128mem:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;
-
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index a3e04b0..98975ea 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -37,7 +37,6 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -145,6 +144,19 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::XMM7: case X86::XMM15: case X86::MM7:
return 7;
+ case X86::ES:
+ return 0;
+ case X86::CS:
+ return 1;
+ case X86::SS:
+ return 2;
+ case X86::DS:
+ return 3;
+ case X86::FS:
+ return 4;
+ case X86::GS:
+ return 5;
+
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -158,8 +170,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
unsigned SubIdx) const {
switch (SubIdx) {
default: return 0;
- case 1:
- // 8-bit
+ case X86::sub_8bit:
if (B == &X86::GR8RegClass) {
if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
return A;
@@ -191,12 +202,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR16_NOREXRegClass;
else if (A == &X86::GR16_ABCDRegClass)
return &X86::GR16_ABCDRegClass;
- } else if (B == &X86::FR32RegClass) {
- return A;
}
break;
- case 2:
- // 8-bit hi
+ case X86::sub_8bit_hi:
if (B == &X86::GR8_ABCD_HRegClass) {
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
A == &X86::GR64_NOREXRegClass ||
@@ -209,12 +217,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
A == &X86::GR16_NOREXRegClass)
return &X86::GR16_ABCDRegClass;
- } else if (B == &X86::FR64RegClass) {
- return A;
}
break;
- case 3:
- // 16-bit
+ case X86::sub_16bit:
if (B == &X86::GR16RegClass) {
if (A->getSize() == 4 || A->getSize() == 8)
return A;
@@ -238,12 +243,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR32_NOREXRegClass;
else if (A == &X86::GR32_ABCDRegClass)
return &X86::GR64_ABCDRegClass;
- } else if (B == &X86::VR128RegClass) {
- return A;
}
break;
- case 4:
- // 32-bit
+ case X86::sub_32bit:
if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
if (A->getSize() == 8)
return A;
@@ -261,6 +263,18 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR64_ABCDRegClass;
}
break;
+ case X86::sub_ss:
+ if (B == &X86::FR32RegClass)
+ return A;
+ break;
+ case X86::sub_sd:
+ if (B == &X86::FR64RegClass)
+ return A;
+ break;
+ case X86::sub_xmm:
+ if (B == &X86::VR128RegClass)
+ return A;
+ break;
}
return 0;
}
@@ -518,6 +532,30 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
return Offset;
}
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
void X86RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -537,7 +575,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *New = 0;
if (Old->getOpcode() == getCallFrameSetupOpcode()) {
New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri),
+ TII.get(getSUBriOpcode(Is64Bit, Amount)),
StackPtr)
.addReg(StackPtr)
.addImm(Amount);
@@ -549,9 +587,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
Amount -= CalleeAmt;
if (Amount) {
- unsigned Opc = (Amount < 128) ?
- (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
- (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+ unsigned Opc = getADDriOpcode(Is64Bit, Amount);
New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(Amount);
@@ -571,9 +607,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// something off the stack pointer, add it back. We do this until we have
// more advanced stack pointer tracking ability.
if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
- unsigned Opc = (CalleeAmt < 128) ?
- (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
- (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+ unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
MachineInstr *Old = I;
MachineInstr *New =
BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
@@ -691,13 +725,9 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
const TargetInstrInfo &TII) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc = isSub
- ? ((Offset < 128) ?
- (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
- (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri))
- : ((Offset < 128) ?
- (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
- (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri));
+ unsigned Opc = isSub ?
+ getSUBriOpcode(Is64Bit, Offset) :
+ getADDriOpcode(Is64Bit, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -899,7 +929,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
!needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->hasCalls() && // No calls.
+ !MFI->adjustsStack() && // No calls.
!Subtarget->isTargetWin64()) { // Win64 has no Red Zone
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
@@ -917,7 +947,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
// size is bigger than the callers.
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
.addImm(-TailCallReturnAddrDelta);
@@ -1307,7 +1338,7 @@ X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
// Calculate amount of bytes used for return address storing
int stackGrowth = (Is64Bit ? -8 : -4);
- // Initial state of the frame pointer is esp+4.
+ // Initial state of the frame pointer is esp+stackGrowth.
MachineLocation Dst(MachineLocation::VirtualFP);
MachineLocation Src(StackPtr, stackGrowth);
Moves.push_back(MachineMove(0, Dst, Src));
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index ac96c4c..d0b82e2 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -30,16 +30,6 @@ namespace N86 {
};
}
-namespace X86 {
- /// SubregIndex - The index of various sized subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// X86RegisterInfo.td file.
- enum SubregIndex {
- SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
- SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
- };
-}
-
/// DWARFFlavour - Flavour of dwarf regnumbers
///
namespace DWARFFlavour {
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 49a6ca0..91cfaa9 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -18,6 +18,17 @@
//
let Namespace = "X86" in {
+ // Subregister indices.
+ def sub_8bit : SubRegIndex;
+ def sub_8bit_hi : SubRegIndex;
+ def sub_16bit : SubRegIndex;
+ def sub_32bit : SubRegIndex;
+
+ def sub_ss : SubRegIndex;
+ def sub_sd : SubRegIndex;
+ def sub_xmm : SubRegIndex;
+
+
// In the register alias definitions below, we define which registers alias
// which others. We only specify which registers the small registers alias,
// because the register file generator is smart enough to figure out that
@@ -57,17 +68,22 @@ let Namespace = "X86" in {
def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
// 16-bit registers
+ let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+ }
+ let SubRegIndices = [sub_8bit] in {
def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+ }
def IP : Register<"ip">, DwarfRegNum<[16]>;
// X86-64 only
+ let SubRegIndices = [sub_8bit] in {
def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
@@ -76,8 +92,9 @@ let Namespace = "X86" in {
def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
-
+ }
// 32-bit registers
+ let SubRegIndices = [sub_16bit] in {
def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
@@ -97,8 +114,10 @@ let Namespace = "X86" in {
def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
+ }
// 64-bit registers, X86-64 only
+ let SubRegIndices = [sub_32bit] in {
def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
@@ -117,6 +136,7 @@ let Namespace = "X86" in {
def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
+ }
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
@@ -137,7 +157,9 @@ let Namespace = "X86" in {
def FP5 : Register<"fp5">;
def FP6 : Register<"fp6">;
- // XMM Registers, used by the various SSE instruction set extensions
+ // XMM Registers, used by the various SSE instruction set extensions.
+ // The sub_ss and sub_sd subregs are the same registers with another regclass.
+ let CompositeIndices = [(sub_ss), (sub_sd)] in {
def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
@@ -156,8 +178,10 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+ }
// YMM Registers, used by AVX instructions
+ let SubRegIndices = [sub_xmm] in {
def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
@@ -174,6 +198,7 @@ let Namespace = "X86" in {
def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
+ }
// Floating point stack registers
def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
@@ -207,106 +232,19 @@ let Namespace = "X86" in {
def DR7 : Register<"dr7">;
// Condition registers
- def ECR0 : Register<"ecr0">;
- def ECR1 : Register<"ecr1">;
- def ECR2 : Register<"ecr2">;
- def ECR3 : Register<"ecr3">;
- def ECR4 : Register<"ecr4">;
- def ECR5 : Register<"ecr5">;
- def ECR6 : Register<"ecr6">;
- def ECR7 : Register<"ecr7">;
-
- def RCR0 : Register<"rcr0">;
- def RCR1 : Register<"rcr1">;
- def RCR2 : Register<"rcr2">;
- def RCR3 : Register<"rcr3">;
- def RCR4 : Register<"rcr4">;
- def RCR5 : Register<"rcr5">;
- def RCR6 : Register<"rcr6">;
- def RCR7 : Register<"rcr7">;
- def RCR8 : Register<"rcr8">;
+ def CR0 : Register<"cr0">;
+ def CR1 : Register<"cr1">;
+ def CR2 : Register<"cr2">;
+ def CR3 : Register<"cr3">;
+ def CR4 : Register<"cr4">;
+ def CR5 : Register<"cr5">;
+ def CR6 : Register<"cr6">;
+ def CR7 : Register<"cr7">;
+ def CR8 : Register<"cr8">;
}
//===----------------------------------------------------------------------===//
-// Subregister Set Definitions... now that we have all of the pieces, define the
-// sub registers for each register.
-//
-
-def x86_subreg_8bit : PatLeaf<(i32 1)>;
-def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
-def x86_subreg_16bit : PatLeaf<(i32 3)>;
-def x86_subreg_32bit : PatLeaf<(i32 4)>;
-
-def x86_subreg_ss : PatLeaf<(i32 1)>;
-def x86_subreg_sd : PatLeaf<(i32 2)>;
-def x86_subreg_xmm : PatLeaf<(i32 3)>;
-
-def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [AX, CX, DX, BX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [EAX, ECX, EDX, EBX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
- [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [RAX, RCX, RDX, RBX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
-
-def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
// implicitly defined to be the register allocation order.
@@ -370,7 +308,7 @@ def GR8 : RegisterClass<"X86", [i8], 8,
def GR16 : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
- let SubRegClassList = [GR8, GR8];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -422,7 +360,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -477,7 +415,9 @@ def GR32 : RegisterClass<"X86", [i32], 32,
def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32 sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -511,14 +451,8 @@ def DEBUG_REG : RegisterClass<"X86", [i32], 32,
}
// Control registers.
-def CONTROL_REG_32 : RegisterClass<"X86", [i32], 32,
- [ECR0, ECR1, ECR2, ECR3, ECR4, ECR5, ECR6,
- ECR7]> {
-}
-
-def CONTROL_REG_64 : RegisterClass<"X86", [i64], 64,
- [RCR0, RCR1, RCR2, RCR3, RCR4, RCR5, RCR6,
- RCR7, RCR8]> {
+def CONTROL_REG : RegisterClass<"X86", [i64], 64,
+ [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
}
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
@@ -532,20 +466,27 @@ def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
}
def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
}
def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
}
def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit),
+ (GR32_ABCD sub_32bit)];
}
def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
}
def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
R8, R9, R11]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32_TC];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_TC sub_32bit)];
}
// GR8_NOREX - GR8 registers which do not require a REX prefix.
@@ -585,7 +526,7 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
// GR16_NOREX - GR16 registers which do not require a REX prefix.
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -608,7 +549,8 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
// GR32_NOREX - GR32 registers which do not require a REX prefix.
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -631,7 +573,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -656,7 +600,7 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
def GR32_NOSP : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -709,7 +653,9 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
def GR64_NOSP : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_NOSP sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -734,7 +680,9 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -758,7 +706,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
// A class to support the 'A' assembler constraint: EAX then EDX.
def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
}
// Scalar SSE2 floating point registers.
@@ -836,7 +786,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
- let SubRegClassList = [FR32, FR64];
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
+
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -856,7 +807,7 @@ def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
- let SubRegClassList = [FR32, FR64, VR128];
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
}
// Status flags registers.
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index cd87b82..6297a27 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -12,11 +12,232 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "x86-selectiondag-info"
-#include "X86SelectionDAGInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
-X86SelectionDAGInfo::X86SelectionDAGInfo() {
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+ TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ TLI(*TM.getTargetLowering()) {
}
X86SelectionDAGInfo::~X86SelectionDAGInfo() {
}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ const Value *DstSV,
+ uint64_t DstSVOff) const {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
+ if ((Align & 3) != 0 ||
+ !ConstantSize ||
+ ConstantSize->getZExtValue() >
+ Subtarget->getMaxInlineSizeThreshold()) {
+ SDValue InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ EVT IntPtr = TLI.getPointerTy();
+ const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+ DAG, dl);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDValue();
+ }
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ SDValue InFlag(0, 0);
+ EVT AVT;
+ SDValue Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getZExtValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ break;
+ }
+
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Size;
+ EVT CVT = Count.getValueType();
+ SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+ X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+ } else if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT AddrVT = Dst.getValueType();
+ EVT SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, DstSV, DstSVOff + Offset);
+ }
+
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+ return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const {
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ /// If not DWORD aligned, call the library.
+ if ((Align & 3) != 0)
+ return SDValue();
+
+ // DWORD aligned
+ EVT AVT = MVT::i32;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
+ AVT = MVT::i64;
+
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ unsigned CountVal = SizeVal / UBytes;
+ SDValue Count = DAG.getIntPtrConstant(CountVal);
+ unsigned BytesLeft = SizeVal % UBytes;
+
+ SDValue InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ X86::ESI,
+ Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+ array_lengthof(Ops));
+
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT DstVT = Dst.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, AlwaysInline,
+ DstSV, DstSVOff + Offset,
+ SrcSV, SrcSVOff + Offset));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Results[0], Results.size());
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 9834754..4f30f31 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -18,10 +18,40 @@
namespace llvm {
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ const X86TargetLowering &TLI;
+
public:
- X86SelectionDAGInfo();
+ explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
~X86SelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ const Value *DstSV,
+ uint64_t DstSVOff) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const;
};
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f39904e..f2c5058 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,17 +17,13 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Support/CommandLine.h"
-
using namespace llvm;
-static cl::opt<bool> DisableSSEDomain("disable-sse-domain",
- cl::init(false), cl::Hidden,
- cl::desc("Disable SSE Domain Fixing"));
-
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
@@ -43,6 +39,18 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ }
+}
+
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
@@ -63,6 +71,12 @@ extern "C" void LLVMInitializeX86Target() {
createX86_32AsmBackend);
TargetRegistry::RegisterAsmBackend(TheX86_64Target,
createX86_64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
+ createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
+ createMCStreamer);
}
@@ -88,7 +102,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget.getStackAlignment(),
(Subtarget.isTargetWin64() ? -40 :
(Subtarget.is64Bit() ? -8 : -4))),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
+ ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
// If no relocation model was picked, default as appropriate for the target.
@@ -178,8 +193,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() &&
- !DisableSSEDomain) {
+ if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
PM.add(createSSEDomainFixPass());
return true;
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index dc4234c..f9fb424 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -23,6 +23,7 @@
#include "X86JITInfo.h"
#include "X86Subtarget.h"
#include "X86ISelLowering.h"
+#include "X86SelectionDAGInfo.h"
namespace llvm {
@@ -35,6 +36,7 @@ class X86TargetMachine : public LLVMTargetMachine {
X86InstrInfo InstrInfo;
X86JITInfo JITInfo;
X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
X86ELFWriterInfo ELFWriterInfo;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
@@ -54,6 +56,9 @@ public:
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
}
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const X86RegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 3990b8b..b230572 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -80,7 +80,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setShiftAmountType(MVT::i32);
setStackPointerRegisterToSaveRestore(XCore::SP);
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
// Use i32 for setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index c983112..5260258 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -361,9 +361,8 @@ bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == SrcRC) {
if (DestRC == XCore::GRRegsRegisterClass) {
@@ -395,7 +394,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill,
int FrameIndex,
- const TargetRegisterClass *RC) const
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -408,7 +408,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -419,7 +420,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty()) {
return true;
}
@@ -437,7 +439,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MBB.addLiveIn(it->getReg());
storeRegToStackSlot(MBB, MI, it->getReg(), true,
- it->getFrameIdx(), it->getRegClass());
+ it->getFrameIdx(), it->getRegClass(), &RI);
if (emitFrameMoves) {
MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
@@ -449,7 +451,8 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const
{
bool AtStart = MI == MBB.begin();
MachineBasicBlock::iterator BeforeI = MI;
@@ -460,7 +463,7 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
loadRegFromStackSlot(MBB, MI, it->getReg(),
it->getFrameIdx(),
- it->getRegClass());
+ it->getRegClass(), &RI);
assert(MI != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert multiple
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 3e0a765..9035ea9 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -67,25 +67,30 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool ReverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 6aac237..44aeb60 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "xcore-selectiondag-info"
-#include "XCoreSelectionDAGInfo.h"
+#include "XCoreTargetMachine.h"
using namespace llvm;
-XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() {
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index fd96716..0386968 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class XCoreTargetMachine;
+
class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- XCoreSelectionDAGInfo();
+ explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
~XCoreSelectionDAGInfo();
};
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 267f46a..b0013eb 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -28,7 +28,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
"i16:16:32-i32:32:32-i64:32:32-n32"),
InstrInfo(),
FrameInfo(*this),
- TLInfo(*this) {
+ TLInfo(*this),
+ TSInfo(*this) {
}
bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 701a6f1..14073ba 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -20,6 +20,7 @@
#include "XCoreSubtarget.h"
#include "XCoreInstrInfo.h"
#include "XCoreISelLowering.h"
+#include "XCoreSelectionDAGInfo.h"
namespace llvm {
@@ -29,6 +30,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreInstrInfo InstrInfo;
XCoreFrameInfo FrameInfo;
XCoreTargetLowering TLInfo;
+ XCoreSelectionDAGInfo TSInfo;
public:
XCoreTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -40,6 +42,10 @@ public:
return &TLInfo;
}
+ virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const TargetRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
OpenPOWER on IntegriCloud