summaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.td24
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h24
-rw-r--r--lib/Target/ARM/ARMAsmBackend.cpp36
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp375
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--lib/Target/ARM/ARMBaseInfo.h53
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp320
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h40
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp73
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h11
-rw-r--r--lib/Target/ARM/ARMCallingConv.td3
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp11
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp39
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp132
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp232
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp65
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h6
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp148
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp958
-rw-r--r--lib/Target/ARM/ARMISelLowering.h40
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td144
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td1293
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td329
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td60
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td267
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td70
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp78
-rw-r--r--lib/Target/ARM/ARMMCAsmInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMMCCodeEmitter.cpp33
-rw-r--r--lib/Target/ARM/ARMMCExpr.h3
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp18
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td59
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td371
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.h15
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp31
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp3
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp391
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp61
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp825
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h76
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h398
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp126
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h20
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp28
-rw-r--r--lib/Target/ARM/README.txt24
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp34
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp99
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h8
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp19
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp29
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp261
-rw-r--r--lib/Target/Alpha/Alpha.td2
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp2
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td2
-rw-r--r--lib/Target/Alpha/README.txt4
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp2
-rw-r--r--lib/Target/CBackend/CBackend.cpp18
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td2
-rw-r--r--lib/Target/CellSPU/SPUAsmPrinter.cpp7
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp7
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp2
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h8
-rw-r--r--lib/Target/CellSPU/SPUInstrFormats.td22
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp64
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td70
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h8
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp6
-rw-r--r--lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp129
-rw-r--r--lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h5
-rw-r--r--lib/Target/MBlaze/MBlaze.td27
-rw-r--r--lib/Target/MBlaze/MBlazeAsmBackend.cpp15
-rw-r--r--lib/Target/MBlaze/MBlazeAsmPrinter.cpp3
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp8
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFPU.td43
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFSL.td12
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h1
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td150
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp20
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h2
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td25
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td83
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule3.td236
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule5.td267
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.cpp36
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.h37
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp18
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h7
-rw-r--r--lib/Target/MBlaze/TODO7
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h5
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp3
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp2
-rw-r--r--lib/Target/Mips/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/Mips.h3
-rw-r--r--lib/Target/Mips/Mips.td4
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp115
-rw-r--r--lib/Target/Mips/MipsCallingConv.td18
-rw-r--r--lib/Target/Mips/MipsExpandPseudo.cpp117
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp99
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h4
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp172
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp715
-rw-r--r--lib/Target/Mips/MipsISelLowering.h36
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td215
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td85
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp381
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h68
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td152
-rw-r--r--lib/Target/Mips/MipsMCAsmInfo.h2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp61
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td36
-rw-r--r--lib/Target/Mips/MipsSchedule.td2
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp4
-rw-r--r--lib/Target/Mips/MipsSubtarget.h16
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp6
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h1
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h10
-rw-r--r--lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp2
-rw-r--r--lib/Target/PTX/PTX.h8
-rw-r--r--lib/Target/PTX/PTX.td31
-rw-r--r--lib/Target/PTX/PTXAsmPrinter.cpp210
-rw-r--r--lib/Target/PTX/PTXFrameLowering.h3
-rw-r--r--lib/Target/PTX/PTXISelDAGToDAG.cpp87
-rw-r--r--lib/Target/PTX/PTXISelLowering.cpp114
-rw-r--r--lib/Target/PTX/PTXISelLowering.h9
-rw-r--r--lib/Target/PTX/PTXInstrInfo.cpp278
-rw-r--r--lib/Target/PTX/PTXInstrInfo.h130
-rw-r--r--lib/Target/PTX/PTXInstrInfo.td854
-rw-r--r--lib/Target/PTX/PTXIntrinsicInstrInfo.td84
-rw-r--r--lib/Target/PTX/PTXMCAsmStreamer.cpp22
-rw-r--r--lib/Target/PTX/PTXMFInfoExtract.cpp4
-rw-r--r--lib/Target/PTX/PTXMachineFunctionInfo.h21
-rw-r--r--lib/Target/PTX/PTXRegisterInfo.td395
-rw-r--r--lib/Target/PTX/PTXSubtarget.cpp30
-rw-r--r--lib/Target/PTX/PTXSubtarget.h51
-rw-r--r--lib/Target/PTX/PTXTargetMachine.cpp42
-rw-r--r--lib/Target/PTX/PTXTargetMachine.h26
-rw-r--r--lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp8
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h7
-rw-r--r--lib/Target/PowerPC/PPCAsmBackend.cpp8
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp3
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td2
-rw-r--r--lib/Target/PowerPC/PPCMCAsmInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp15
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h16
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp10
-rw-r--r--lib/Target/README.txt156
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp2
-rw-r--r--lib/Target/SubtargetFeature.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp2
-rw-r--r--lib/Target/TargetData.cpp10
-rw-r--r--lib/Target/TargetInstrInfo.cpp7
-rw-r--r--lib/Target/TargetLibraryInfo.cpp17
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp21
-rw-r--r--lib/Target/TargetMachine.cpp16
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp139
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp5
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c283
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h86
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h29
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp19
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h14
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp40
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp1
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h9
-rw-r--r--lib/Target/X86/README-X86-64.txt2
-rw-r--r--lib/Target/X86/README.txt179
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp66
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h26
-rw-r--r--lib/Target/X86/X86.td23
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp65
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp5
-rw-r--r--lib/Target/X86/X86CallingConv.td7
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp8
-rw-r--r--lib/Target/X86/X86FastISel.cpp507
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp161
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp75
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp264
-rw-r--r--lib/Target/X86/X86ISelLowering.h9
-rw-r--r--lib/Target/X86/X86Instr3DNow.td109
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td2
-rw-r--r--lib/Target/X86/X86InstrControl.td10
-rw-r--r--lib/Target/X86/X86InstrFormats.td66
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp41
-rw-r--r--lib/Target/X86/X86InstrInfo.h136
-rw-r--r--lib/Target/X86/X86InstrInfo.td78
-rw-r--r--lib/Target/X86/X86InstrSSE.td861
-rw-r--r--lib/Target/X86/X86InstrSystem.td40
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp23
-rw-r--r--lib/Target/X86/X86MCAsmInfo.h8
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp30
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp51
-rw-r--r--lib/Target/X86/X86RegisterInfo.h6
-rw-r--r--lib/Target/X86/X86RegisterInfo.td20
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp3
-rw-r--r--lib/Target/X86/X86Subtarget.h19
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp48
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp23
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h10
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp186
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp4
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td92
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp5
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h2
217 files changed, 12883 insertions, 5263 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index bf4315f..6af5f85 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -51,6 +51,12 @@ def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
// to just not use them.
def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
"Disable VFP / NEON MAC instructions">;
+
+// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
+ "HasVMLxForwarding", "true",
+ "Has multiplier accumulator forwarding">;
+
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations.
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
@@ -61,6 +67,14 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
+/// Some instructions update CPSR partially, which can add false dependency for
+/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is
+/// mapped to a separate physical register. Avoid partial CPSR update for these
+/// processors.
+def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
+ "AvoidCPSRPartialUpdate", "true",
+ "Avoid CPSR partial update for OOO execution">;
+
// Multiprocessing extension.
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
@@ -100,11 +114,13 @@ def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others",
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
[FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureT2XtPk]>;
+ FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
- [FeatureHasSlowFPVMLx, FeatureT2XtPk,
- FeatureFP16]>;
+ [FeatureVMLxForwarding,
+ FeatureT2XtPk, FeatureFP16,
+ FeatureAvoidPartialCPSR]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, GenericItineraries, Features>;
@@ -171,6 +187,8 @@ def : Processor<"cortex-a8", CortexA8Itineraries,
[ArchV7A, ProcA8]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
[ArchV7A, ProcA9]>;
+def : Processor<"cortex-a9-mp", CortexA9Itineraries,
+ [ArchV7A, ProcA9, FeatureMP]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [ArchV7M]>;
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 19fbf05..595708f 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -408,16 +408,18 @@ namespace ARM_AM {
//
// The first operand is always a Reg. The second operand is a reg if in
// reg/reg form, otherwise it's reg#0. The third field encodes the operation
- // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+ // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The
+ // fourth operand 16-17 encodes the index mode.
//
// If this addressing mode is a frame index (before prolog/epilog insertion
// and code rewriting), this operand will have the form: FI#, reg0, <offs>
// with no shift amount for the frame offset.
//
- static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+ static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO,
+ unsigned IdxMode = 0) {
assert(Imm12 < (1 << 12) && "Imm too large!");
bool isSub = Opc == sub;
- return Imm12 | ((int)isSub << 12) | (SO << 13);
+ return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ;
}
static inline unsigned getAM2Offset(unsigned AM2Opc) {
return AM2Opc & ((1 << 12)-1);
@@ -426,7 +428,10 @@ namespace ARM_AM {
return ((AM2Opc >> 12) & 1) ? sub : add;
}
static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
- return (ShiftOpc)(AM2Opc >> 13);
+ return (ShiftOpc)((AM2Opc >> 13) & 7);
+ }
+ static inline unsigned getAM2IdxMode(unsigned AM2Opc) {
+ return (AM2Opc >> 16);
}
@@ -441,12 +446,14 @@ namespace ARM_AM {
//
// The first operand is always a Reg. The second operand is a reg if in
// reg/reg form, otherwise it's reg#0. The third field encodes the operation
- // in bit 8, the immediate in bits 0-7.
+ // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the
+ // index mode.
/// getAM3Opc - This function encodes the addrmode3 opc field.
- static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+ static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset,
+ unsigned IdxMode = 0) {
bool isSub = Opc == sub;
- return ((int)isSub << 8) | Offset;
+ return ((int)isSub << 8) | Offset | (IdxMode << 9);
}
static inline unsigned char getAM3Offset(unsigned AM3Opc) {
return AM3Opc & 0xFF;
@@ -454,6 +461,9 @@ namespace ARM_AM {
static inline AddrOpc getAM3Op(unsigned AM3Opc) {
return ((AM3Opc >> 8) & 1) ? sub : add;
}
+ static inline unsigned getAM3IdxMode(unsigned AM3Opc) {
+ return (AM3Opc >> 9);
+ }
//===--------------------------------------------------------------------===//
// Addressing Mode #4
diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp
index ec23449..f062819 100644
--- a/lib/Target/ARM/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/ARMAsmBackend.cpp
@@ -246,7 +246,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
}
uint32_t out = (opc << 21);
- out |= (Value & 0x800) << 14;
+ out |= (Value & 0x800) << 15;
out |= (Value & 0x700) << 4;
out |= (Value & 0x0FF);
@@ -416,21 +416,22 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
// FIXME: This should be in a separate file.
class DarwinARMAsmBackend : public ARMAsmBackend {
public:
- DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { }
-
- void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value) const;
+ const object::mach::CPUSubtypeARM Subtype;
+ DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st)
+ : ARMAsmBackend(T), Subtype(st) { }
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- // FIXME: Subtarget info should be derived. Force v7 for now.
return createMachObjectWriter(new ARMMachObjectWriter(
/*Is64Bit=*/false,
object::mach::CTM_ARM,
- object::mach::CSARM_V7),
+ Subtype),
OS,
/*IsLittleEndian=*/true);
}
+ void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value) const;
+
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
return false;
}
@@ -499,14 +500,17 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
- return new DarwinARMAsmBackend(T);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- assert(0 && "Windows not supported on ARM");
- default:
- return new ELFARMAsmBackend(T, Triple(TT).getOS());
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin()) {
+ if (TheTriple.getArchName() == "armv6" ||
+ TheTriple.getArchName() == "thumbv6")
+ return new DarwinARMAsmBackend(T, object::mach::CSARM_V6);
+ return new DarwinARMAsmBackend(T, object::mach::CSARM_V7);
}
+
+ if (TheTriple.isOSWindows())
+ assert(0 && "Windows not supported on ARM");
+
+ return new ELFARMAsmBackend(T, Triple(TT).getOS());
}
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index db12b8e..c428e18 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -88,6 +88,11 @@ namespace {
case ARMBuildAttrs::CPU_name:
Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
break;
+ /* GAS requires .fpu to be emitted regardless of EABI attribute */
+ case ARMBuildAttrs::Advanced_SIMD_arch:
+ case ARMBuildAttrs::VFP_arch:
+ Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+ break;
default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
}
}
@@ -167,6 +172,117 @@ getDebugValueLocation(const MachineInstr *MI) const {
return Location;
}
+/// getDwarfRegOpSize - get size required to emit given machine location using
+/// dwarf encoding.
+unsigned ARMAsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
+ return AsmPrinter::getDwarfRegOpSize(MLoc);
+ else {
+ unsigned Reg = MLoc.getReg();
+ if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+ // S registers are described as bit-pieces of a register
+ // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+ // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+
+ unsigned SReg = Reg - ARM::S0;
+ unsigned Rx = 256 + (SReg >> 1);
+ OutStreamer.AddComment("Loc expr size");
+ // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB
+ // 1 + ULEB(Rx) + 1 + 1 + 1
+ return 4 + MCAsmInfo::getULEB128Size(Rx);
+ }
+
+ if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+ assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+ // Q registers Q0-Q15 are described by composing two D registers together.
+ // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8)
+
+ unsigned QReg = Reg - ARM::Q0;
+ unsigned D1 = 256 + 2 * QReg;
+ unsigned D2 = D1 + 1;
+
+ OutStreamer.AddComment("Loc expr size");
+ // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) +
+ // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8);
+ // 6 + ULEB(D1) + ULEB(D2)
+ return 6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2);
+ }
+ }
+ return 0;
+}
+
+/// EmitDwarfRegOp - Emit dwarf register operation.
+void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+ if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1)
+ AsmPrinter::EmitDwarfRegOp(MLoc);
+ else {
+ unsigned Reg = MLoc.getReg();
+ if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering");
+ // S registers are described as bit-pieces of a register
+ // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0)
+ // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32)
+
+ unsigned SReg = Reg - ARM::S0;
+ bool odd = SReg & 0x1;
+ unsigned Rx = 256 + (SReg >> 1);
+ OutStreamer.AddComment("Loc expr size");
+ // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB
+ // 1 + ULEB(Rx) + 1 + 1 + 1
+ EmitInt16(4 + MCAsmInfo::getULEB128Size(Rx));
+
+ OutStreamer.AddComment("DW_OP_regx for S register");
+ EmitInt8(dwarf::DW_OP_regx);
+
+ OutStreamer.AddComment(Twine(SReg));
+ EmitULEB128(Rx);
+
+ if (odd) {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 32");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(32);
+ } else {
+ OutStreamer.AddComment("DW_OP_bit_piece 32 0");
+ EmitInt8(dwarf::DW_OP_bit_piece);
+ EmitULEB128(32);
+ EmitULEB128(0);
+ }
+ } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) {
+ assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering");
+ // Q registers Q0-Q15 are described by composing two D registers together.
+ // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8)
+
+ unsigned QReg = Reg - ARM::Q0;
+ unsigned D1 = 256 + 2 * QReg;
+ unsigned D2 = D1 + 1;
+
+ OutStreamer.AddComment("Loc expr size");
+ // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) +
+ // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8);
+ // 6 + ULEB(D1) + ULEB(D2)
+ EmitInt16(6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2));
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D1");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D1);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+
+ OutStreamer.AddComment("DW_OP_regx for Q register: D2");
+ EmitInt8(dwarf::DW_OP_regx);
+ EmitULEB128(D2);
+ OutStreamer.AddComment("DW_OP_piece 8");
+ EmitInt8(dwarf::DW_OP_piece);
+ EmitULEB128(8);
+ }
+ }
+}
+
void ARMAsmPrinter::EmitFunctionEntryLabel() {
if (AFI->isThumbFunction()) {
OutStreamer.EmitAssemblerFlag(MCAF_Code16);
@@ -453,10 +569,13 @@ void ARMAsmPrinter::emitAttributes() {
emitARMAttributeSection();
+ /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
+ bool emitFPU = false;
AttributeEmitter *AttrEmitter;
- if (OutStreamer.hasRawTextSupport())
+ if (OutStreamer.hasRawTextSupport()) {
AttrEmitter = new AsmAttributeEmitter(OutStreamer);
- else {
+ emitFPU = true;
+ } else {
MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
AttrEmitter = new ObjectAttributeEmitter(O);
}
@@ -490,10 +609,36 @@ void ARMAsmPrinter::emitAttributes() {
ARMBuildAttrs::Allowed);
}
- // FIXME: Emit FPU type
- if (Subtarget->hasVFP2())
+ if (Subtarget->hasNEON() && emitFPU) {
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/vfpv3/vfpv2 for .fpu parameters */
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+ /* If emitted for NEON, omit from VFP below, since you can have both
+ * NEON and VFP in build attributes but only one .fpu */
+ emitFPU = false;
+ }
+
+ /* VFPv3 + .fpu */
+ if (Subtarget->hasVFP3()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
+
+ /* VFPv2 + .fpu */
+ } else if (Subtarget->hasVFP2()) {
AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
ARMBuildAttrs::AllowFPv2);
+ if (emitFPU)
+ AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+ }
+
+ /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
+ * since NEON can have 1 (allowed) or 2 (fused MAC operations) */
+ if (Subtarget->hasNEON()) {
+ AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::Allowed);
+ }
// Signal various FP modes.
if (!UnsafeFPMath) {
@@ -777,10 +922,161 @@ void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
OutStreamer.EmitInstruction(TmpInst);
}
+void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
+ assert(MI->getFlag(MachineInstr::FrameSetup) &&
+ "Only instruction which are involved into frame setup code are allowed");
+
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
+
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+ unsigned Opc = MI->getOpcode();
+ unsigned SrcReg, DstReg;
+
+ if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
+ // Two special cases:
+ // 1) tPUSH does not have src/dst regs.
+ // 2) for Thumb1 code we sometimes materialize the constant via constpool
+ // load. Yes, this is pretty fragile, but for now I don't see better
+ // way... :(
+ SrcReg = DstReg = ARM::SP;
+ } else {
+ SrcReg = MI->getOperand(1).getReg();
+ DstReg = MI->getOperand(0).getReg();
+ }
+
+ // Try to figure out the unwinding opcode out of src / dst regs.
+ if (MI->getDesc().mayStore()) {
+ // Register saves.
+ assert(DstReg == ARM::SP &&
+ "Only stack pointer as a destination reg is supported");
+
+ SmallVector<unsigned, 4> RegList;
+ // Skip src & dst reg, and pred ops.
+ unsigned StartOp = 2 + 2;
+ // Use all the operands.
+ unsigned NumOffset = 0;
+
+ switch (Opc) {
+ default:
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ case ARM::tPUSH:
+ // Special case here: no src & dst reg, but two extra imp ops.
+ StartOp = 2; NumOffset = 2;
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ assert(SrcReg == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
+ i != NumOps; ++i)
+ RegList.push_back(MI->getOperand(i).getReg());
+ break;
+ case ARM::STR_PRE:
+ assert(MI->getOperand(2).getReg() == ARM::SP &&
+ "Only stack pointer as a source reg is supported");
+ RegList.push_back(SrcReg);
+ break;
+ }
+ OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ } else {
+ // Changes of stack / frame pointer.
+ if (SrcReg == ARM::SP) {
+ int64_t Offset = 0;
+ switch (Opc) {
+ default:
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ case ARM::MOVr:
+ case ARM::tMOVgpr2gpr:
+ case ARM::tMOVgpr2tgpr:
+ Offset = 0;
+ break;
+ case ARM::ADDri:
+ Offset = -MI->getOperand(2).getImm();
+ break;
+ case ARM::SUBri:
+ case ARM::t2SUBrSPi:
+ Offset = MI->getOperand(2).getImm();
+ break;
+ case ARM::tSUBspi:
+ Offset = MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tADDspi:
+ case ARM::tADDrSPi:
+ Offset = -MI->getOperand(2).getImm()*4;
+ break;
+ case ARM::tLDRpci: {
+ // Grab the constpool index and check, whether it corresponds to
+ // original or cloned constpool entry.
+ unsigned CPI = MI->getOperand(1).getIndex();
+ const MachineConstantPool *MCP = MF.getConstantPool();
+ if (CPI >= MCP->getConstants().size())
+ CPI = AFI.getOriginalCPIdx(CPI);
+ assert(CPI != -1U && "Invalid constpool index");
+
+ // Derive the actual offset.
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+ assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+ // FIXME: Check for user, it should be "add" instruction!
+ Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+ break;
+ }
+ }
+
+ if (DstReg == FramePtr && FramePtr != ARM::SP)
+ // Set-up of the frame pointer. Positive values correspond to "add"
+ // instruction.
+ OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+ else if (DstReg == ARM::SP) {
+ // Change of SP by an offset. Positive values correspond to "sub"
+ // instruction.
+ OutStreamer.EmitPad(Offset);
+ } else {
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ } else if (DstReg == ARM::SP) {
+ // FIXME: .movsp goes here
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ else {
+ MI->dump();
+ assert(0 && "Unsupported opcode for unwinding information");
+ }
+ }
+}
+
+extern cl::opt<bool> EnableARMEHABI;
+
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned Opc = MI->getOpcode();
switch (Opc) {
default: break;
+ case ARM::B: {
+ // B is just a Bcc with an 'always' predicate.
+ MCInst TmpInst;
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+ TmpInst.setOpcode(ARM::Bcc);
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::LDMIA_RET: {
+ // LDMIA_RET is just a normal LDMIA_UPD instruction that targets PC and as
+ // such has additional code-gen properties and scheduling information.
+ // To emit it, we just construct as normal and set the opcode to LDMIA_UPD.
+ MCInst TmpInst;
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+ TmpInst.setOpcode(ARM::LDMIA_UPD);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
case ARM::t2ADDrSPi:
case ARM::t2ADDrSPi12:
case ARM::t2SUBrSPi:
@@ -850,6 +1146,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(TmpInst);
return;
}
+ // Darwin call instructions are just normal call instructions with different
+ // clobber semantics (they clobber R9).
+ case ARM::BLr9:
+ case ARM::BLr9_pred:
+ case ARM::BLXr9:
+ case ARM::BLXr9_pred: {
+ unsigned newOpc;
+ switch (Opc) {
+ default: assert(0);
+ case ARM::BLr9: newOpc = ARM::BL; break;
+ case ARM::BLr9_pred: newOpc = ARM::BL_pred; break;
+ case ARM::BLXr9: newOpc = ARM::BLX; break;
+ case ARM::BLXr9_pred: newOpc = ARM::BLX_pred; break;
+ }
+ MCInst TmpInst;
+ LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+ TmpInst.setOpcode(newOpc);
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
case ARM::BXr9_CALL:
case ARM::BX_CALL: {
{
@@ -1502,6 +1818,49 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
}
+ // Tail jump branches are really just branch instructions with additional
+ // code-gen attributes. Convert them to the canonical form here.
+ case ARM::TAILJMPd:
+ case ARM::TAILJMPdND: {
+ MCInst TmpInst, TmpInst2;
+ // Lower the instruction as-is to get the operands properly converted.
+ LowerARMMachineInstrToMCInst(MI, TmpInst2, *this);
+ TmpInst.setOpcode(ARM::Bcc);
+ TmpInst.addOperand(TmpInst2.getOperand(0));
+ // Add predicate operands.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("TAILCALL");
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::tTAILJMPd:
+ case ARM::tTAILJMPdND: {
+ MCInst TmpInst, TmpInst2;
+ LowerARMMachineInstrToMCInst(MI, TmpInst2, *this);
+ TmpInst.setOpcode(ARM::tB);
+ TmpInst.addOperand(TmpInst2.getOperand(0));
+ OutStreamer.AddComment("TAILCALL");
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+ case ARM::TAILJMPrND:
+ case ARM::tTAILJMPrND:
+ case ARM::TAILJMPr:
+ case ARM::tTAILJMPr: {
+ unsigned newOpc = (Opc == ARM::TAILJMPr || Opc == ARM::TAILJMPrND)
+ ? ARM::BX : ARM::tBX;
+ MCInst TmpInst;
+ TmpInst.setOpcode(newOpc);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ // Predicate.
+ TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ TmpInst.addOperand(MCOperand::CreateReg(0));
+ OutStreamer.AddComment("TAILCALL");
+ OutStreamer.EmitInstruction(TmpInst);
+ return;
+ }
+
// These are the pseudos created to comply with stricter operand restrictions
// on ARMv5. Lower them now to "normal" instructions, since all the
// restrictions are already satisfied.
@@ -1530,6 +1889,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+
+ // Emit unwinding stuff for frame-related instructions
+ if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+ EmitUnwindingInstruction(MI);
+
OutStreamer.EmitInstruction(TmpInst);
}
@@ -1538,10 +1902,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
//===----------------------------------------------------------------------===//
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+ TargetMachine &TM,
unsigned SyntaxVariant,
const MCAsmInfo &MAI) {
if (SyntaxVariant == 0)
- return new ARMInstPrinter(MAI);
+ return new ARMInstPrinter(TM, MAI);
return 0;
}
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 5852684..1ee1b70 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -82,11 +82,20 @@ private:
// Generic helper used to emit e.g. ARMv5 mul pseudos
void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+ void EmitUnwindingInstruction(const MachineInstr *MI);
+
public:
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ /// getDwarfRegOpSize - get size required to emit given machine location
+ /// using dwarf encoding.
+ virtual unsigned getDwarfRegOpSize(const MachineLocation &MLoc) const;
+
+ /// EmitDwarfRegOp - Emit dwarf register operation.
+ virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const;
+
virtual unsigned getISAEncoding() {
// ARM/Darwin adds ISA to the DWARF info for each function.
if (!Subtarget->isTargetDarwin())
diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h
index a56cc1a..36edbad 100644
--- a/lib/Target/ARM/ARMBaseInfo.h
+++ b/lib/Target/ARM/ARMBaseInfo.h
@@ -200,6 +200,59 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
}
namespace ARMII {
+
+ /// ARM Index Modes
+ enum IndexMode {
+ IndexModeNone = 0,
+ IndexModePre = 1,
+ IndexModePost = 2,
+ IndexModeUpd = 3
+ };
+
+ /// ARM Addressing Modes
+ enum AddrMode {
+ AddrModeNone = 0,
+ AddrMode1 = 1,
+ AddrMode2 = 2,
+ AddrMode3 = 3,
+ AddrMode4 = 4,
+ AddrMode5 = 5,
+ AddrMode6 = 6,
+ AddrModeT1_1 = 7,
+ AddrModeT1_2 = 8,
+ AddrModeT1_4 = 9,
+ AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
+ AddrModeT2_i12 = 11,
+ AddrModeT2_i8 = 12,
+ AddrModeT2_so = 13,
+ AddrModeT2_pc = 14, // +/- i12 for pc relative data
+ AddrModeT2_i8s4 = 15, // i8 * 4
+ AddrMode_i12 = 16
+ };
+
+ inline static const char *AddrModeToString(AddrMode addrmode) {
+ switch (addrmode) {
+ default: llvm_unreachable("Unknown memory operation");
+ case AddrModeNone: return "AddrModeNone";
+ case AddrMode1: return "AddrMode1";
+ case AddrMode2: return "AddrMode2";
+ case AddrMode3: return "AddrMode3";
+ case AddrMode4: return "AddrMode4";
+ case AddrMode5: return "AddrMode5";
+ case AddrMode6: return "AddrMode6";
+ case AddrModeT1_1: return "AddrModeT1_1";
+ case AddrModeT1_2: return "AddrModeT1_2";
+ case AddrModeT1_4: return "AddrModeT1_4";
+ case AddrModeT1_s: return "AddrModeT1_s";
+ case AddrModeT2_i12: return "AddrModeT2_i12";
+ case AddrModeT2_i8: return "AddrModeT2_i8";
+ case AddrModeT2_so: return "AddrModeT2_so";
+ case AddrModeT2_pc: return "AddrModeT2_pc";
+ case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
+ case AddrMode_i12: return "AddrMode_i12";
+ }
+ }
+
/// Target Operand Flag enum.
enum TOF {
//===------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2268e59..44a3976 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1021,7 +1021,7 @@ reMaterialize(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
DestReg)
.addConstantPoolIndex(CPI).addImm(PCLabelId);
- (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
break;
}
}
@@ -1080,11 +1080,18 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
int CPI1 = MO1.getIndex();
const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
- ARMConstantPoolValue *ACPV0 =
- static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
- ARMConstantPoolValue *ACPV1 =
- static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
- return ACPV0->hasSameValue(ACPV1);
+ bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
+ bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
+ if (isARMCP0 && isARMCP1) {
+ ARMConstantPoolValue *ACPV0 =
+ static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+ ARMConstantPoolValue *ACPV1 =
+ static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+ return ACPV0->hasSameValue(ACPV1);
+ } else if (!isARMCP0 && !isARMCP1) {
+ return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
+ }
+ return false;
} else if (Opcode == ARM::PICLDR) {
if (MI1->getOpcode() != Opcode)
return false;
@@ -1194,7 +1201,7 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
}
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
-/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
/// be scheduled togther. On some targets if two loads are loading from
/// addresses in the same cache line, it's better if they are scheduled
/// together. This function takes two integers that represent the load offsets
@@ -1263,19 +1270,19 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
}
bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
+ unsigned NumCycles,
unsigned ExtraPredCycles,
float Probability,
float Confidence) const {
- if (!NumCyles)
+ if (!NumCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
- float UnpredCost = Probability * NumCyles;
+ float UnpredCost = Probability * NumCycles;
UnpredCost += 1.0; // The branch itself
UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
- return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
+ return (float)(NumCycles + ExtraPredCycles) < UnpredCost;
}
bool ARMBaseInstrInfo::
@@ -1328,7 +1335,7 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
- const ARMBaseInstrInfo &TII) {
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -1346,7 +1353,8 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg, RegState::Kill).addImm(ThisVal)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
BaseReg = DestReg;
}
}
@@ -1610,18 +1618,84 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
// Set the "zero" bit in CPSR.
switch (MI->getOpcode()) {
default: break;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
case ARM::ADDri:
- case ARM::ANDri:
- case ARM::t2ANDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri: {
+ // Scan forward for the use of CPSR, if it's a conditional code requires
+ // checking of V bit, then this is not safe to do. If we can't find the
+ // CPSR use (i.e. used in another block), then it's not safe to perform
+ // the optimization.
+ bool isSafe = false;
+ I = CmpInstr;
+ E = MI->getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR.
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+ switch (CC) {
+ default:
+ isSafe = true;
+ break;
+ case ARMCC::VS:
+ case ARMCC::VC:
+ case ARMCC::GE:
+ case ARMCC::LT:
+ case ARMCC::GT:
+ case ARMCC::LE:
+ return false;
+ }
+ }
+ }
+
+ if (!isSafe)
+ return false;
+
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);
CmpInstr->eraseFromParent();
return true;
}
+ }
return false;
}
@@ -1741,9 +1815,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
llvm_unreachable("Unexpected multi-uops instruction!");
break;
case ARM::VLDMQIA:
- case ARM::VLDMQDB:
case ARM::VSTMQIA:
- case ARM::VSTMQDB:
return 2;
// The number of uOps for load / store multiple are determined by the number
@@ -1757,19 +1829,15 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
// is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON
// load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
case ARM::VLDMDIA:
- case ARM::VLDMDDB:
case ARM::VLDMDIA_UPD:
case ARM::VLDMDDB_UPD:
case ARM::VLDMSIA:
- case ARM::VLDMSDB:
case ARM::VLDMSIA_UPD:
case ARM::VLDMSDB_UPD:
case ARM::VSTMDIA:
- case ARM::VSTMDDB:
case ARM::VSTMDIA_UPD:
case ARM::VSTMDDB_UPD:
case ARM::VSTMSIA:
- case ARM::VSTMSDB:
case ARM::VSTMSIA_UPD:
case ARM::VSTMSDB_UPD: {
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
@@ -1859,7 +1927,6 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
switch (DefTID.getOpcode()) {
default: break;
case ARM::VLDMSIA:
- case ARM::VLDMSDB:
case ARM::VLDMSIA_UPD:
case ARM::VLDMSDB_UPD:
isSLoad = true;
@@ -1935,7 +2002,6 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
switch (UseTID.getOpcode()) {
default: break;
case ARM::VSTMSIA:
- case ARM::VSTMSDB:
case ARM::VSTMSIA_UPD:
case ARM::VSTMSDB_UPD:
isSStore = true;
@@ -2006,11 +2072,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
case ARM::VLDMDIA:
- case ARM::VLDMDDB:
case ARM::VLDMDIA_UPD:
case ARM::VLDMDDB_UPD:
case ARM::VLDMSIA:
- case ARM::VLDMSDB:
case ARM::VLDMSIA_UPD:
case ARM::VLDMSDB_UPD:
DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
@@ -2049,11 +2113,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
break;
case ARM::VSTMDIA:
- case ARM::VSTMDDB:
case ARM::VSTMDIA_UPD:
case ARM::VSTMDDB_UPD:
case ARM::VSTMSIA:
- case ARM::VSTMSDB:
case ARM::VSTMSIA_UPD:
case ARM::VSTMSDB_UPD:
UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
@@ -2160,6 +2222,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
}
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
+ case ARM::VLD1q8_UPD:
+ case ARM::VLD1q16_UPD:
+ case ARM::VLD1q32_UPD:
+ case ARM::VLD1q64_UPD:
+ case ARM::VLD2d8:
+ case ARM::VLD2d16:
+ case ARM::VLD2d32:
+ case ARM::VLD2q8:
+ case ARM::VLD2q16:
+ case ARM::VLD2q32:
+ case ARM::VLD2d8_UPD:
+ case ARM::VLD2d16_UPD:
+ case ARM::VLD2d32_UPD:
+ case ARM::VLD2q8_UPD:
+ case ARM::VLD2q16_UPD:
+ case ARM::VLD2q32_UPD:
+ case ARM::VLD3d8:
+ case ARM::VLD3d16:
+ case ARM::VLD3d32:
+ case ARM::VLD1d64T:
+ case ARM::VLD3d8_UPD:
+ case ARM::VLD3d16_UPD:
+ case ARM::VLD3d32_UPD:
+ case ARM::VLD1d64T_UPD:
+ case ARM::VLD3q8_UPD:
+ case ARM::VLD3q16_UPD:
+ case ARM::VLD3q32_UPD:
+ case ARM::VLD4d8:
+ case ARM::VLD4d16:
+ case ARM::VLD4d32:
+ case ARM::VLD1d64Q:
+ case ARM::VLD4d8_UPD:
+ case ARM::VLD4d16_UPD:
+ case ARM::VLD4d32_UPD:
+ case ARM::VLD1d64Q_UPD:
+ case ARM::VLD4q8_UPD:
+ case ARM::VLD4q16_UPD:
+ case ARM::VLD4q32_UPD:
+ case ARM::VLD1DUPq8:
+ case ARM::VLD1DUPq16:
+ case ARM::VLD1DUPq32:
+ case ARM::VLD1DUPq8_UPD:
+ case ARM::VLD1DUPq16_UPD:
+ case ARM::VLD1DUPq32_UPD:
+ case ARM::VLD2DUPd8:
+ case ARM::VLD2DUPd16:
+ case ARM::VLD2DUPd32:
+ case ARM::VLD2DUPd8_UPD:
+ case ARM::VLD2DUPd16_UPD:
+ case ARM::VLD2DUPd32_UPD:
+ case ARM::VLD4DUPd8:
+ case ARM::VLD4DUPd16:
+ case ARM::VLD4DUPd32:
+ case ARM::VLD4DUPd8_UPD:
+ case ARM::VLD4DUPd16_UPD:
+ case ARM::VLD4DUPd32_UPD:
+ case ARM::VLD1LNd8:
+ case ARM::VLD1LNd16:
+ case ARM::VLD1LNd32:
+ case ARM::VLD1LNd8_UPD:
+ case ARM::VLD1LNd16_UPD:
+ case ARM::VLD1LNd32_UPD:
+ case ARM::VLD2LNd8:
+ case ARM::VLD2LNd16:
+ case ARM::VLD2LNd32:
+ case ARM::VLD2LNq16:
+ case ARM::VLD2LNq32:
+ case ARM::VLD2LNd8_UPD:
+ case ARM::VLD2LNd16_UPD:
+ case ARM::VLD2LNd32_UPD:
+ case ARM::VLD2LNq16_UPD:
+ case ARM::VLD2LNq32_UPD:
+ case ARM::VLD4LNd8:
+ case ARM::VLD4LNd16:
+ case ARM::VLD4LNd32:
+ case ARM::VLD4LNq16:
+ case ARM::VLD4LNq32:
+ case ARM::VLD4LNd8_UPD:
+ case ARM::VLD4LNd16_UPD:
+ case ARM::VLD4LNd32_UPD:
+ case ARM::VLD4LNq16_UPD:
+ case ARM::VLD4LNq32_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
return Latency;
}
@@ -2226,6 +2383,113 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
}
+ if (DefAlign < 8 && Subtarget.isCortexA9())
+ switch (DefTID.getOpcode()) {
+ default: break;
+ case ARM::VLD1q8Pseudo:
+ case ARM::VLD1q16Pseudo:
+ case ARM::VLD1q32Pseudo:
+ case ARM::VLD1q64Pseudo:
+ case ARM::VLD1q8Pseudo_UPD:
+ case ARM::VLD1q16Pseudo_UPD:
+ case ARM::VLD1q32Pseudo_UPD:
+ case ARM::VLD1q64Pseudo_UPD:
+ case ARM::VLD2d8Pseudo:
+ case ARM::VLD2d16Pseudo:
+ case ARM::VLD2d32Pseudo:
+ case ARM::VLD2q8Pseudo:
+ case ARM::VLD2q16Pseudo:
+ case ARM::VLD2q32Pseudo:
+ case ARM::VLD2d8Pseudo_UPD:
+ case ARM::VLD2d16Pseudo_UPD:
+ case ARM::VLD2d32Pseudo_UPD:
+ case ARM::VLD2q8Pseudo_UPD:
+ case ARM::VLD2q16Pseudo_UPD:
+ case ARM::VLD2q32Pseudo_UPD:
+ case ARM::VLD3d8Pseudo:
+ case ARM::VLD3d16Pseudo:
+ case ARM::VLD3d32Pseudo:
+ case ARM::VLD1d64TPseudo:
+ case ARM::VLD3d8Pseudo_UPD:
+ case ARM::VLD3d16Pseudo_UPD:
+ case ARM::VLD3d32Pseudo_UPD:
+ case ARM::VLD1d64TPseudo_UPD:
+ case ARM::VLD3q8Pseudo_UPD:
+ case ARM::VLD3q16Pseudo_UPD:
+ case ARM::VLD3q32Pseudo_UPD:
+ case ARM::VLD3q8oddPseudo:
+ case ARM::VLD3q16oddPseudo:
+ case ARM::VLD3q32oddPseudo:
+ case ARM::VLD3q8oddPseudo_UPD:
+ case ARM::VLD3q16oddPseudo_UPD:
+ case ARM::VLD3q32oddPseudo_UPD:
+ case ARM::VLD4d8Pseudo:
+ case ARM::VLD4d16Pseudo:
+ case ARM::VLD4d32Pseudo:
+ case ARM::VLD1d64QPseudo:
+ case ARM::VLD4d8Pseudo_UPD:
+ case ARM::VLD4d16Pseudo_UPD:
+ case ARM::VLD4d32Pseudo_UPD:
+ case ARM::VLD1d64QPseudo_UPD:
+ case ARM::VLD4q8Pseudo_UPD:
+ case ARM::VLD4q16Pseudo_UPD:
+ case ARM::VLD4q32Pseudo_UPD:
+ case ARM::VLD4q8oddPseudo:
+ case ARM::VLD4q16oddPseudo:
+ case ARM::VLD4q32oddPseudo:
+ case ARM::VLD4q8oddPseudo_UPD:
+ case ARM::VLD4q16oddPseudo_UPD:
+ case ARM::VLD4q32oddPseudo_UPD:
+ case ARM::VLD1DUPq8Pseudo:
+ case ARM::VLD1DUPq16Pseudo:
+ case ARM::VLD1DUPq32Pseudo:
+ case ARM::VLD1DUPq8Pseudo_UPD:
+ case ARM::VLD1DUPq16Pseudo_UPD:
+ case ARM::VLD1DUPq32Pseudo_UPD:
+ case ARM::VLD2DUPd8Pseudo:
+ case ARM::VLD2DUPd16Pseudo:
+ case ARM::VLD2DUPd32Pseudo:
+ case ARM::VLD2DUPd8Pseudo_UPD:
+ case ARM::VLD2DUPd16Pseudo_UPD:
+ case ARM::VLD2DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPd8Pseudo:
+ case ARM::VLD4DUPd16Pseudo:
+ case ARM::VLD4DUPd32Pseudo:
+ case ARM::VLD4DUPd8Pseudo_UPD:
+ case ARM::VLD4DUPd16Pseudo_UPD:
+ case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD1LNq8Pseudo:
+ case ARM::VLD1LNq16Pseudo:
+ case ARM::VLD1LNq32Pseudo:
+ case ARM::VLD1LNq8Pseudo_UPD:
+ case ARM::VLD1LNq16Pseudo_UPD:
+ case ARM::VLD1LNq32Pseudo_UPD:
+ case ARM::VLD2LNd8Pseudo:
+ case ARM::VLD2LNd16Pseudo:
+ case ARM::VLD2LNd32Pseudo:
+ case ARM::VLD2LNq16Pseudo:
+ case ARM::VLD2LNq32Pseudo:
+ case ARM::VLD2LNd8Pseudo_UPD:
+ case ARM::VLD2LNd16Pseudo_UPD:
+ case ARM::VLD2LNd32Pseudo_UPD:
+ case ARM::VLD2LNq16Pseudo_UPD:
+ case ARM::VLD2LNq32Pseudo_UPD:
+ case ARM::VLD4LNd8Pseudo:
+ case ARM::VLD4LNd16Pseudo:
+ case ARM::VLD4LNd32Pseudo:
+ case ARM::VLD4LNq16Pseudo:
+ case ARM::VLD4LNq32Pseudo:
+ case ARM::VLD4LNd8Pseudo_UPD:
+ case ARM::VLD4LNd16Pseudo_UPD:
+ case ARM::VLD4LNd32Pseudo_UPD:
+ case ARM::VLD4LNq16Pseudo_UPD:
+ case ARM::VLD4LNq32Pseudo_UPD:
+ // If the address is not 64-bit aligned, the latencies of these
+ // instructions increases by one.
+ ++Latency;
+ break;
+ }
+
return Latency;
}
@@ -2264,9 +2528,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
default:
return ItinData->getStageLatency(get(Opcode).getSchedClass());
case ARM::VLDMQIA:
- case ARM::VLDMQDB:
case ARM::VSTMQIA:
- case ARM::VSTMQDB:
return 2;
}
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 7e2183d..9a2faf8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -34,25 +34,7 @@ namespace ARMII {
//===------------------------------------------------------------------===//
// This four-bit field describes the addressing mode used.
-
- AddrModeMask = 0x1f,
- AddrModeNone = 0,
- AddrMode1 = 1,
- AddrMode2 = 2,
- AddrMode3 = 3,
- AddrMode4 = 4,
- AddrMode5 = 5,
- AddrMode6 = 6,
- AddrModeT1_1 = 7,
- AddrModeT1_2 = 8,
- AddrModeT1_4 = 9,
- AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
- AddrModeT2_i12 = 11,
- AddrModeT2_i8 = 12,
- AddrModeT2_so = 13,
- AddrModeT2_pc = 14, // +/- i12 for pc relative data
- AddrModeT2_i8s4 = 15, // i8 * 4
- AddrMode_i12 = 16,
+ AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
// Size* - Flags to keep track of the size of an instruction.
SizeShift = 5,
@@ -64,11 +46,9 @@ namespace ARMII {
// IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
// and store ops only. Generic "updating" flag is used for ld/st multiple.
+ // The index mode enums are declared in ARMBaseInfo.h
IndexModeShift = 8,
IndexModeMask = 3 << IndexModeShift,
- IndexModePre = 1,
- IndexModePost = 2,
- IndexModeUpd = 3,
//===------------------------------------------------------------------===//
// Instruction encoding formats.
@@ -311,7 +291,7 @@ public:
int64_t &Offset1, int64_t &Offset2)const;
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
- /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
/// be scheduled togther. On some targets if two loads are loading from
/// addresses in the same cache line, it's better if they are scheduled
/// together. This function takes two integers that represent the load offsets
@@ -327,7 +307,7 @@ public:
const MachineFunction &MF) const;
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles, unsigned ExtraPredCycles,
+ unsigned NumCycles, unsigned ExtraPredCycles,
float Prob, float Confidence) const;
virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
@@ -337,10 +317,10 @@ public:
float Probability, float Confidence) const;
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
+ unsigned NumCycles,
float Probability,
float Confidence) const {
- return NumCyles == 1;
+ return NumCycles == 1;
}
/// AnalyzeCompare - For a comparison instruction, return the source register
@@ -496,19 +476,19 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
- const ARMBaseInstrInfo &TII);
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
- const ARMBaseInstrInfo &TII);
+ const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg,
int NumBytes, const TargetInstrInfo &TII,
const ARMBaseRegisterInfo& MRI,
- DebugLoc dl);
+ unsigned MIFlags = 0);
/// rewriteARMFrameIndex / rewriteT2FrameIndex -
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 67a4b7d..ea1f08a 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -88,7 +88,7 @@ BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- // FIXME: avoid re-calculating this everytime.
+ // FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
@@ -342,12 +342,51 @@ ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
return false;
}
+const TargetRegisterClass*
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+ const {
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ do {
+ switch (Super->getID()) {
+ case ARM::GPRRegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::DPRRegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QQPRRegClassID:
+ case ARM::QQQQPRRegClassID:
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
const TargetRegisterClass *
ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
return ARM::GPRRegisterClass;
}
+unsigned
+ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return TFI->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
/// getAllocationOrder - Returns the register allocation order for a specified
/// register class in the form of a pair of TargetRegisterClass iterators.
std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
@@ -428,6 +467,10 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
};
+ // We only support even/odd hints for GPR and rGPR.
+ if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
+ return std::make_pair(RC->allocation_order_begin(MF),
+ RC->allocation_order_end(MF));
if (HintType == ARMRI::RegPairEven) {
if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
@@ -530,6 +573,29 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
}
}
+bool
+ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
+ // CortexA9 has a Write-after-write hazard for NEON registers.
+ if (!STI.isCortexA9())
+ return false;
+
+ switch (RC->getID()) {
+ case ARM::DPRRegClassID:
+ case ARM::DPR_8RegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::QPRRegClassID:
+ case ARM::QPR_8RegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::SPRRegClassID:
+ case ARM::SPR_8RegClassID:
+ // Avoid reusing S, D, and Q registers.
+ // Don't increase register pressure for QQ and QQQQ.
+ return true;
+ default:
+ return false;
+ }
+}
+
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -806,7 +872,7 @@ emitLoadConstPool(MachineBasicBlock &MBB,
DebugLoc dl,
unsigned DestReg, unsigned SubIdx, int Val,
ARMCC::CondCodes Pred,
- unsigned PredReg) const {
+ unsigned PredReg, unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C =
@@ -816,7 +882,8 @@ emitLoadConstPool(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
.addReg(DestReg, getDefRegState(true), SubIdx)
.addConstantPoolIndex(Idx)
- .addImm(0).addImm(Pred).addReg(PredReg);
+ .addImm(0).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
}
bool ARMBaseRegisterInfo::
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index ba6bd2b..9edf72d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -128,6 +128,12 @@ public:
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
getAllocationOrder(const TargetRegisterClass *RC,
unsigned HintType, unsigned HintReg,
@@ -139,6 +145,8 @@ public:
void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const;
+ virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const;
+
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
@@ -176,7 +184,8 @@ public:
unsigned DestReg, unsigned SubIdx,
int Val,
ARMCC::CondCodes Pred = ARMCC::AL,
- unsigned PredReg = 0) const;
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags)const;
/// Code Generation virtual methods...
virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 426ba13..d2981c0 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -22,6 +22,9 @@ class CCIfAlign<string Align, CCAction A>:
//===----------------------------------------------------------------------===//
def CC_ARM_APCS : CallingConv<[
+ // Handles byval parameters.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
CCIfType<[i8, i16], CCPromoteToType<i32>>,
// Handle all vector types as either f64 or v2f64.
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 9bbf6a0..fa73716 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -312,6 +312,15 @@ namespace {
unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
const { return 0; }
+ unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+ unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
+ const { return 0; }
+
/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
/// machine operand requires relocation, record the relocation and return
/// zero.
@@ -969,7 +978,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
const TargetInstrDesc &TID) const {
- for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
+ for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){
const MachineOperand &MO = MI.getOperand(i-1);
if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
return 1 << ARMII::S_BitShift;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 13d1b33..baf95a3 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1650,24 +1650,27 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
unsigned DestOffset = BBOffsets[DestBB->getNumber()];
if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
- MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI;
- if (CmpMI->getOpcode() == ARM::tCMPi8) {
- unsigned Reg = CmpMI->getOperand(0).getReg();
- Pred = llvm::getInstrPredicate(CmpMI, PredReg);
- if (Pred == ARMCC::AL &&
- CmpMI->getOperand(1).getImm() == 0 &&
- isARMLowRegister(Reg)) {
- MachineBasicBlock *MBB = Br.MI->getParent();
- MachineInstr *NewBR =
- BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
- .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
- CmpMI->eraseFromParent();
- Br.MI->eraseFromParent();
- Br.MI = NewBR;
- BBSizes[MBB->getNumber()] -= 2;
- AdjustBBOffsetsAfter(MBB, -2);
- ++NumCBZ;
- MadeChange = true;
+ MachineBasicBlock::iterator CmpMI = Br.MI;
+ if (CmpMI != Br.MI->getParent()->begin()) {
+ --CmpMI;
+ if (CmpMI->getOpcode() == ARM::tCMPi8) {
+ unsigned Reg = CmpMI->getOperand(0).getReg();
+ Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+ if (Pred == ARMCC::AL &&
+ CmpMI->getOperand(1).getImm() == 0 &&
+ isARMLowRegister(Reg)) {
+ MachineBasicBlock *MBB = Br.MI->getParent();
+ MachineInstr *NewBR =
+ BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
+ CmpMI->eraseFromParent();
+ Br.MI->eraseFromParent();
+ Br.MI = NewBR;
+ BBSizes[MBB->getNumber()] -= 2;
+ AdjustBBOffsetsAfter(MBB, -2);
+ ++NumCBZ;
+ MadeChange = true;
+ }
}
}
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index bd753d2..b6b3c75 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -455,6 +455,10 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
MI.eraseFromParent();
}
@@ -496,10 +500,13 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
- if (SrcIsKill)
- // Add an implicit kill for the super-reg.
- (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ if (SrcIsKill) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
+
+ // Transfer memoperands.
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
MI.eraseFromParent();
}
@@ -622,9 +629,8 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
- if (SrcIsKill)
- // Add an implicit kill for the super-reg.
- (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ if (SrcIsKill) // Add an implicit kill for the super-reg.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
}
@@ -655,8 +661,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
LO16 = LO16.addImm(SOImmValV1);
HI16 = HI16.addImm(SOImmValV2);
- (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
LO16.addImm(Pred).addReg(PredReg).addReg(0);
HI16.addImm(Pred).addReg(PredReg).addReg(0);
TransferImpOps(MI, LO16, HI16);
@@ -692,8 +698,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
}
- (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
LO16.addImm(Pred).addReg(PredReg);
HI16.addImm(Pred).addReg(PredReg);
@@ -708,6 +714,78 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
switch (Opcode) {
default:
return false;
+ case ARM::VMOVScc:
+ case ARM::VMOVDcc: {
+ unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
+ MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCr: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr),
+ MI.getOperand(1).getReg())
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCs: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+ (MI.getOperand(1).getReg()))
+ .addReg(MI.getOperand(2).getReg(),
+ getKillRegState(MI.getOperand(2).isKill()))
+ .addReg(MI.getOperand(3).getReg(),
+ getKillRegState(MI.getOperand(3).isKill()))
+ .addImm(MI.getOperand(4).getImm())
+ .addImm(MI.getOperand(5).getImm()) // 'pred'
+ .addReg(MI.getOperand(6).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCi16: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg());
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MOVCCi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::MVNCCi: {
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+ MI.getOperand(1).getReg())
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .addReg(MI.getOperand(4).getReg())
+ .addReg(0); // 's' bit
+
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
@@ -726,9 +804,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
} else if (AFI->isThumbFunction()) {
- llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6,
- FramePtr, -NumBytes,
- *TII, RI, MI.getDebugLoc());
+ llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+ FramePtr, -NumBytes, *TII, RI);
} else {
llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
FramePtr, -NumBytes, ARMCC::AL, 0,
@@ -785,7 +862,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
TII->get(ARM::BL))
.addExternalSymbol("__aeabi_read_tp", 0);
- (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
@@ -800,7 +877,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(NewLdOpc), DstReg)
.addOperand(MI.getOperand(1)));
- (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::tPICADD))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -823,7 +900,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
const MachineOperand &MO1 = MI.getOperand(1);
const GlobalValue *GV = MO1.getGlobal();
unsigned TF = MO1.getTargetFlags();
- bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
+ bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode != ARM::t2MOV_ga_dyn);
bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel;
@@ -856,7 +933,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (isARM) {
AddDefaultPred(MIB3);
if (Opcode == ARM::MOV_ga_pcrel_ldr)
- (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
TransferImpOps(MI, MIB1, MIB3);
MI.eraseFromParent();
@@ -896,9 +973,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM::VLDMQIA:
- case ARM::VLDMQDB: {
- unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB;
+ case ARM::VLDMQIA: {
+ unsigned NewOpc = ARM::VLDMDIA;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
@@ -927,9 +1003,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM::VSTMQIA:
- case ARM::VSTMQDB: {
- unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB;
+ case ARM::VSTMQIA: {
+ unsigned NewOpc = ARM::VSTMDIA;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
@@ -950,9 +1025,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
MIB.addReg(D0).addReg(D1);
- if (SrcIsKill)
- // Add an implicit kill for the Q register.
- (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ if (SrcIsKill) // Add an implicit kill for the Q register.
+ MIB->addRegisterKilled(SrcReg, TRI, true);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
@@ -960,14 +1034,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
case ARM::VDUPfqf:
case ARM::VDUPfdf:{
- unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd;
+ unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q :
+ ARM::VDUPLN32d;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
unsigned OpIdx = 0;
unsigned SrcReg = MI.getOperand(1).getReg();
unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
- Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
+ Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
+ &ARM::DPR_VFP2RegClass);
// The lane is [0,1] for the containing DReg superregister.
// Copy the dst/src register operands.
MIB.addOperand(MI.getOperand(OpIdx++));
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 26f48b3..3baf274 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
#include "ARMCallingConv.h"
#include "ARMRegisterInfo.h"
@@ -26,6 +27,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -115,6 +117,11 @@ class ARMFastISel : public FastISel {
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill);
virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
@@ -123,14 +130,18 @@ class ARMFastISel : public FastISel {
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
const ConstantFP *FPImm);
- virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- uint64_t Imm);
virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2);
+
virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx);
@@ -193,6 +204,7 @@ class ARMFastISel : public FastISel {
// OptionalDef handling routines.
private:
+ bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
void AddLoadStoreOperands(EVT VT, Address &Addr,
@@ -221,6 +233,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
return true;
}
+bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
+ const TargetInstrDesc &TID = MI->getDesc();
+
+ // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+ AFI->isThumb2Function())
+ return false;
+
+ for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
+ if (TID.OpInfo[i].isPredicate())
+ return true;
+
+ return false;
+}
+
// If the machine is predicable go ahead and add the predicate operands, if
// it needs default CC operands add those.
// TODO: If we want to support thumb1 then we'll need to deal with optional
@@ -230,8 +257,10 @@ const MachineInstrBuilder &
ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
MachineInstr *MI = &*MIB;
- // Do we use a predicate?
- if (TII.isPredicable(MI))
+ // Do we use a predicate? or...
+ // Are we NEON in ARM mode and have a predicate operand? If so, I know
+ // we're not predicable but add it anyways.
+ if (TII.isPredicable(MI) || isARMNEONPred(MI))
AddDefaultPred(MIB);
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
@@ -296,6 +325,31 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ unsigned Op2, bool Op2IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addReg(Op2, Op2IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
@@ -384,6 +438,26 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
return ResultReg;
}
+unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm1, uint64_t Imm2) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm1).addImm(Imm2));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm1).addImm(Imm2));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY),
+ ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx) {
@@ -667,24 +741,29 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
- SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(Op);
- do {
- Op = Worklist.pop_back_val();
+ for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
- } else if (isa<AddOperator>(Op) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add with a constant operand. Fold the constant.
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
ConstantInt *CI =
- cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
- // Add the other operand back to the work list.
- Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
- } else
- goto unsupported_gep;
- } while (!Worklist.empty());
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
}
}
@@ -767,26 +846,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
// Since the offset is too large for the load/store instruction
// get the reg+offset into a register.
if (needsLowering) {
- ARMCC::CondCodes Pred = ARMCC::AL;
- unsigned PredReg = 0;
-
- TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
- ARM::GPRRegisterClass;
- unsigned BaseReg = createResultReg(RC);
-
- if (!isThumb)
- emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- BaseReg, Addr.Base.Reg, Addr.Offset,
- Pred, PredReg,
- static_cast<const ARMBaseInstrInfo&>(TII));
- else {
- assert(AFI->isThumb2Function());
- emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg,
- static_cast<const ARMBaseInstrInfo&>(TII));
- }
+ Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
+ /*Op0IsKill*/false, Addr.Offset, MVT::i32);
Addr.Offset = 0;
- Addr.Base.Reg = BaseReg;
}
}
@@ -797,7 +859,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
VT.getSimpleVT().SimpleTy == MVT::f64)
Addr.Offset /= 4;
-
+
// Frame base works a bit differently. Handle it separately.
if (Addr.BaseType == Address::FrameIndexBase) {
int FI = Addr.Base.FI;
@@ -819,7 +881,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
} else {
// Now add the rest of the operands.
MIB.addReg(Addr.Base.Reg);
-
+
// ARM halfword load/stores need an additional operand.
if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
@@ -1007,18 +1069,16 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// behavior.
// TODO: Factor this out.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
- MVT VT;
- const Type *Ty = CI->getOperand(0)->getType();
- if (!isTypeLegal(Ty, VT))
- return false;
-
+ MVT SourceVT;
+ const Type *Ty = CI->getOperand(0)->getType();
+ if (CI->hasOneUse() && (CI->getParent() == I->getParent())
+ && isTypeLegal(Ty, SourceVT)) {
bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
if (isFloat && !Subtarget->hasVFP2())
return false;
unsigned CmpOpc;
- switch (VT.SimpleTy) {
+ switch (SourceVT.SimpleTy) {
default: return false;
// TODO: Verify compares.
case MVT::f32:
@@ -1033,7 +1093,14 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
}
// Get the compare predicate.
- ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+ // Try to take advantage of fallthrough opportunities.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ Predicate = CmpInst::getInversePredicate(Predicate);
+ }
+
+ ARMCC::CondCodes ARMPred = getComparePred(Predicate);
// We may not handle every CC for now.
if (ARMPred == ARMCC::AL) return false;
@@ -1061,19 +1128,55 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
FuncInfo.MBB->addSuccessor(TBB);
return true;
}
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ (isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
+ unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TstOpc))
+ .addReg(OpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
+
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+ }
}
unsigned CmpReg = getRegForValue(BI->getCondition());
if (CmpReg == 0) return false;
- // Re-set the flags just in case.
- unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
- .addReg(CmpReg).addImm(0));
+ // We've been divorced from our compare! Our block was split, and
+ // now our compare lives in a predecessor block. We musn't
+ // re-compare here, as the children of the compare aren't guaranteed
+ // live across the block boundary (we *could* check for this).
+ // Regardless, the compare has been done in the predecessor block,
+ // and it left a value for us in a virtual register. Ergo, we test
+ // the one-bit value left in the virtual register.
+ unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
+ .addReg(CmpReg).addImm(1));
+
+ unsigned CCMode = ARMCC::NE;
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ CCMode = ARMCC::EQ;
+ }
unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
- .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
FastEmitBranch(FBB, DL);
FuncInfo.MBB->addSuccessor(TBB);
return true;
@@ -1636,17 +1739,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
- // Depend our opcode for thumb on whether or not we're targeting an
- // externally callable function. For libcalls we'll just pass a NULL GV
- // in here.
- bool isExternal = false;
- if (!GV || GV->hasExternalLinkage()) isExternal = true;
-
// Darwin needs the r9 versions of the opcodes.
bool isDarwin = Subtarget->isTargetDarwin();
- if (isThumb && isExternal) {
- return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi;
- } else if (isThumb) {
+ if (isThumb) {
return isDarwin ? ARM::tBLr9 : ARM::tBL;
} else {
return isDarwin ? ARM::BLr9 : ARM::BL;
@@ -1671,9 +1766,6 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
else if (!isTypeLegal(RetTy, RetVT))
return false;
- // For now we're using BLX etc on the assumption that we have v5t ops.
- if (!Subtarget->hasV5TOps()) return false;
-
// TODO: For now if we have long calls specified we don't handle the call.
if (EnableARMLongCalls) return false;
@@ -1711,7 +1803,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
- // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // Issue the call, BLr9 for darwin, BL otherwise.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
unsigned CallOpc = ARMSelectCallOp(NULL);
@@ -1772,13 +1864,9 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
else if (!isTypeLegal(RetTy, RetVT))
return false;
- // For now we're using BLX etc on the assumption that we have v5t ops.
- // TODO: Maybe?
- if (!Subtarget->hasV5TOps()) return false;
-
// TODO: For now if we have long calls specified we don't handle the call.
if (EnableARMLongCalls) return false;
-
+
// Set up the argument vectors.
SmallVector<Value*, 8> Args;
SmallVector<unsigned, 8> ArgRegs;
@@ -1827,7 +1915,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
return false;
- // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // Issue the call, BLr9 for darwin, BL otherwise.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
unsigned CallOpc = ARMSelectCallOp(GV);
@@ -1842,7 +1930,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc))
.addGlobalAddress(GV, 0, 0));
-
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 68c33f0..e2e95d4 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -106,14 +106,13 @@ static void
emitSPUpdate(bool isARM,
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
DebugLoc dl, const ARMBaseInstrInfo &TII,
- int NumBytes,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
if (isARM)
emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- Pred, PredReg, TII);
+ ARMCC::AL, 0, TII, MIFlags);
else
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
- Pred, PredReg, TII);
+ ARMCC::AL, 0, TII, MIFlags);
}
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -141,11 +140,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
// Allocate the vararg register save area. This is not counted in NumBytes.
if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize);
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+ MachineInstr::FrameSetup);
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
return;
}
@@ -196,7 +197,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0);
+ .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
AddDefaultCC(AddDefaultPred(MIB));
}
@@ -226,7 +228,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
NumBytes = DPRCSOffset;
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
if (HasFP && isARM)
// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
// Note it's not safe to do this in Thumb2 mode because it would have
@@ -282,6 +285,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
// to reference locals.
+ // FIXME: Clarify FrameSetup flags here.
if (RegInfo->hasBasePointer(MF)) {
if (isARM)
BuildMI(MBB, MBBI, dl,
@@ -396,8 +400,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
- ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
- : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+ ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd)
+ : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
if (JumpTarget.isGlobal())
MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
@@ -408,10 +412,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
JumpTarget.getTargetFlags());
}
} else if (RetOpcode == ARM::TCRETURNri) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
addReg(JumpTarget.getReg(), RegState::Kill);
} else if (RetOpcode == ARM::TCRETURNriND) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)).
addReg(JumpTarget.getReg(), RegState::Kill);
}
@@ -439,8 +445,7 @@ ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
int
ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg,
+ int FI, unsigned &FrameReg,
int SPAdj) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMBaseRegisterInfo *RegInfo =
@@ -484,19 +489,23 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
return FPOffset;
} else if (MFI->hasVarSizedObjects()) {
assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
- // Try to use the frame pointer if we can, else use the base pointer
- // since it's available. This is handy for the emergency spill slot, in
- // particular.
if (AFI->isThumb2Function()) {
+ // Try to use the frame pointer if we can, else use the base pointer
+ // since it's available. This is handy for the emergency spill slot, in
+ // particular.
if (FPOffset >= -255 && FPOffset < 0) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
}
- } else
- FrameReg = RegInfo->getBaseRegister();
+ }
} else if (AFI->isThumb2Function()) {
+ // Use add <rd>, sp, #<imm8>
+ // ldr <rd>, [sp, #<imm8>]
+ // if at all possible to save space.
+ if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
+ return Offset;
// In Thumb2 mode, the negative offset is very limited. Try to avoid
- // out of range references.
+ // out of range references. ldr <rt>,[<rn>, #-<imm8>]
if (FPOffset >= -255 && FPOffset < 0) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
@@ -524,7 +533,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
const std::vector<CalleeSavedInfo> &CSI,
unsigned StmOpc, unsigned StrOpc,
bool NoGap,
- bool(*Func)(unsigned, bool)) const {
+ bool(*Func)(unsigned, bool),
+ unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
@@ -567,14 +577,14 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
if (Regs.size() > 1 || StrOpc== 0) {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
- .addReg(ARM::SP));
+ .addReg(ARM::SP).setMIFlags(MIFlags));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
} else if (Regs.size() == 1) {
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
ARM::SP)
.addReg(Regs[0].first, getKillRegState(Regs[0].second))
- .addReg(ARM::SP);
+ .addReg(ARM::SP).setMIFlags(MIFlags);
// ARM mode needs an extra reg0 here due to addrmode2. Will go away once
// that refactoring is complete (eventually).
if (StrOpc == ARM::STR_PRE) {
@@ -676,9 +686,12 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
unsigned FltOpc = ARM::VSTMDDB_UPD;
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register);
- emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register);
- emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+ MachineInstr::FrameSetup);
+ emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+ MachineInstr::FrameSetup);
return true;
}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 1288b70..61bb8af 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -51,7 +51,8 @@ public:
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
- int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+ int ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
unsigned &FrameReg, int SPAdj) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
@@ -62,7 +63,8 @@ public:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
unsigned StrOpc, bool NoGap,
- bool(*Func)(unsigned, bool)) const;
+ bool(*Func)(unsigned, bool),
+ unsigned MIFlags = 0) const;
void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
unsigned LdrOpc, bool isVarArg, bool NoGap,
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index e97ce50..517bba8 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -49,6 +49,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
const TargetInstrDesc &LastTID = LastMI->getDesc();
// Skip over one non-VFP / NEON instruction.
if (!LastTID.isBarrier() &&
+ // On A9, AGU and NEON/FPU are muxed.
+ !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) &&
(LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
MachineBasicBlock::iterator I = LastMI;
if (I != LastMI->getParent()->begin()) {
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index f0d5a7d..abe5a31 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -45,7 +45,7 @@ DisableShifterOp("disable-shifter-op", cl::Hidden,
static cl::opt<bool>
CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
- cl::init(false));
+ cl::init(true));
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
@@ -91,9 +91,14 @@ public:
bool isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
bool SelectShifterOperandReg(SDValue N, SDValue &A,
- SDValue &B, SDValue &C);
+ SDValue &B, SDValue &C,
+ bool CheckProfitability = true);
bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
- SDValue &B, SDValue &C);
+ SDValue &B, SDValue &C) {
+ // Don't apply the profitability check
+ return SelectShifterOperandReg(N, A, B, C, false);
+ }
+
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
@@ -174,16 +179,6 @@ public:
return ARM_AM::getT2SOImmVal(~Imm) != -1;
}
- inline bool Pred_so_imm(SDNode *inN) const {
- ConstantSDNode *N = cast<ConstantSDNode>(inN);
- return is_so_imm(N->getZExtValue());
- }
-
- inline bool Pred_t2_so_imm(SDNode *inN) const {
- ConstantSDNode *N = cast<ConstantSDNode>(inN);
- return is_t2_so_imm(N->getZExtValue());
- }
-
// Include the pieces autogenerated from the target description.
#include "ARMGenDAGISel.inc"
@@ -373,7 +368,8 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
- SDValue &Opc) {
+ SDValue &Opc,
+ bool CheckProfitability) {
if (DisableShifterOp)
return false;
@@ -390,7 +386,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
ShImmVal = RHS->getZExtValue() & 31;
} else {
ShReg = N.getOperand(1);
- if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+ if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
return false;
}
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
@@ -398,30 +394,6 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
return true;
}
-bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
- SDValue &BaseReg,
- SDValue &ShReg,
- SDValue &Opc) {
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
-
- // Don't match base register only case. That is matched to a separate
- // lower complexity pattern with explicit register operand.
- if (ShOpcVal == ARM_AM::no_shift) return false;
-
- BaseReg = N.getOperand(0);
- unsigned ShImmVal = 0;
- // Do not check isShifterOpProfitable. This must return true.
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- ShReg = CurDAG->getRegister(0, MVT::i32);
- ShImmVal = RHS->getZExtValue() & 31;
- } else {
- ShReg = N.getOperand(1);
- }
- Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
- MVT::i32);
- return true;
-}
-
bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
SDValue &Base,
SDValue &OffImm) {
@@ -437,7 +409,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
-
+
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@@ -1138,7 +1110,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
-
+
if (N.getOpcode() == ARMISD::Wrapper &&
!(Subtarget->useMovt() &&
N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
@@ -1183,7 +1155,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
!CurDAG->isBaseWithConstantOffset(N))
return false;
-
+
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getSExtValue();
if (N.getOpcode() == ISD::SUB)
@@ -1571,6 +1543,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.data(), Ops.size());
}
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
if (NumVecs == 1)
return VLd;
@@ -1600,6 +1577,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
SDValue Chain = N->getOperand(0);
EVT VT = N->getOperand(Vec0Idx).getValueType();
bool is64BitVector = VT.is64BitVector();
@@ -1672,7 +1652,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SDNode *VSt =
+ CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+ // Transfer memoperands.
+ cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+ return VSt;
}
// Otherwise, quad registers are stored with two separate instructions,
@@ -1693,6 +1679,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
MemAddr.getValueType(),
MVT::Other, OpsA, 7);
+ cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
@@ -1709,8 +1696,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
- Ops.data(), Ops.size());
+ SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+ Ops.data(), Ops.size());
+ cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
+ return VStB;
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
@@ -1726,6 +1715,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return NULL;
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
SDValue Chain = N->getOperand(0);
unsigned Lane =
cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
@@ -1812,6 +1804,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
QOpcodes[OpcodeIndex]);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
Ops.data(), Ops.size());
+ cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
if (!IsLoad)
return VLdLn;
@@ -1838,6 +1831,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
return NULL;
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -1882,12 +1878,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
std::vector<EVT> ResTys;
- ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts));
+ ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
SDNode *VLdDup =
CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
SuperReg = SDValue(VLdDup, 0);
// Extract the subregisters.
@@ -2168,7 +2165,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
// Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
// Pattern complexity = 6 cost = 11 size = 0
//
- // Also FCPYScc and FCPYDcc.
+ // Also VMOVScc and VMOVDcc.
SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
unsigned Opc = 0;
@@ -2450,34 +2447,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV:
return SelectCMOVOp(N);
- case ARMISD::CNEG: {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue N2 = N->getOperand(2);
- SDValue N3 = N->getOperand(3);
- SDValue InFlag = N->getOperand(4);
- assert(N2.getOpcode() == ISD::Constant);
- assert(N3.getOpcode() == ISD::Register);
-
- SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
- cast<ConstantSDNode>(N2)->getZExtValue()),
- MVT::i32);
- SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
- unsigned Opc = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default: assert(false && "Illegal conditional move type!");
- break;
- case MVT::f32:
- Opc = ARM::VNEGScc;
- break;
- case MVT::f64:
- Opc = ARM::VNEGDcc;
- break;
- }
- return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
- }
-
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
@@ -2870,6 +2839,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
+ case ARMISD::VTBL1: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 6> Ops;
+
+ Ops.push_back(N->getOperand(0));
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+ }
+ case ARMISD::VTBL2: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+ Ops.data(), Ops.size());
+ }
+
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ab9f9e1..0a31b87 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -72,6 +72,11 @@ ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
+// The APCS parameter registers.
+static const unsigned GPRArgRegs[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3
+};
+
void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
@@ -393,6 +398,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
}
+ // Use divmod iOS compiler-rt calls.
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+ setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+ }
+
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
else
@@ -461,6 +472,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+ // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
+ // a destination type that is wider than the source.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -502,18 +517,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// i64 operation support.
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
if (Subtarget->isThumb1Only()) {
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i32, Expand);
- setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- } else {
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::i32, Expand);
- if (!Subtarget->hasV6Ops())
- setOperationAction(ISD::MULHS, MVT::i32, Expand);
}
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops())
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
@@ -597,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
// Since the libcalls include locking, fold in the fences
setShouldFoldAtomicFences(true);
}
@@ -716,7 +740,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// pressure of the register class's representative and all of it's super
// classes' representatives transitively. We have not implemented this because
// of the difficulty prior to coalescing of modeling operand register classes
-// due to the common occurence of cross class copies and subregister insertions
+// due to the common occurrence of cross class copies and subregister insertions
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
ARMTargetLowering::findRepresentativeClass(EVT VT) const{
@@ -778,7 +802,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
- case ARMISD::CNEG: return "ARMISD::CNEG";
case ARMISD::RBIT: return "ARMISD::RBIT";
@@ -853,6 +876,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VTBL1: return "ARMISD::VTBL1";
+ case ARMISD::VTBL2: return "ARMISD::VTBL2";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
@@ -861,6 +886,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
+ case ARMISD::VBSL: return "ARMISD::VBSL";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
@@ -946,27 +972,6 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::RegPressure;
}
-// FIXME: Move to RegInfo
-unsigned
-ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- switch (RC->getID()) {
- default:
- return 0;
- case ARM::tGPRRegClassID:
- return TFI->hasFP(MF) ? 4 : 5;
- case ARM::GPRRegClassID: {
- unsigned FP = TFI->hasFP(MF) ? 1 : 0;
- return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
- }
- case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
- case ARM::DPRRegClassID:
- return 32 - 10;
- }
-}
-
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
@@ -1130,22 +1135,6 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
return Chain;
}
-/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
-/// by "Src" to address "Dst" of size "Size". Alignment information is
-/// specified by the specific parameter attribute. The copy will be passed as
-/// a byval function parameter.
-/// Sometimes what we are copying is the end of a larger object, the part that
-/// does not fit in registers.
-static SDValue
-CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
- ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
- DebugLoc dl) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
- return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
-}
-
/// LowerMemOpCallTo - Store the argument to the stack.
SDValue
ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
@@ -1156,9 +1145,6 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
- if (Flags.isByVal())
- return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
-
return DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
@@ -1224,6 +1210,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
*DAG.getContext());
+ CCInfo.setCallOrPrologue(Call);
CCInfo.AnalyzeCallOperands(Outs,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
@@ -1253,6 +1240,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ bool isByVal = Flags.isByVal();
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -1299,6 +1287,43 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (isByVal) {
+ assert(VA.isMemLoc());
+ unsigned offset = 0;
+
+ // True if this byval aggregate will be split between registers
+ // and memory.
+ if (CCInfo.isFirstByValRegValid()) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned int i, j;
+ for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ SDValue Const = DAG.getConstant(4*i, MVT::i32);
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(j, Load));
+ }
+ offset = ARM::R4 - CCInfo.getFirstByValReg();
+ CCInfo.clearFirstByValReg();
+ }
+
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ StkPtrOff);
+ SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
+ MVT::i32);
+ MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile=*/false,
+ /*AlwaysInline=*/false,
+ MachinePointerInfo(0),
+ MachinePointerInfo(0)));
+
} else if (!IsSibCall) {
assert(VA.isMemLoc());
@@ -1332,7 +1357,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// than necessary, because it means that each store effectively depends
// on every argument instead of just those arguments it would clobber.
- // Do not flag preceeding copytoreg stuff together with the following stuff.
+ // Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
@@ -1492,6 +1517,35 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
dl, DAG, InVals);
}
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack. Remember the next parameter register to allocate,
+/// and then confiscate the rest of the parameter registers to insure
+/// this.
+void
+llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ unsigned reg = State->AllocateReg(GPRArgRegs, 4);
+ assert((State->getCallOrPrologue() == Prologue ||
+ State->getCallOrPrologue() == Call) &&
+ "unhandled ParmContext");
+ if ((!State->isFirstByValRegValid()) &&
+ (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+ State->setFirstByValReg(reg);
+ // At a call site, a byval parameter that is split between
+ // registers and memory needs its size truncated here. In a
+ // function prologue, such byval parameters are reassembled in
+ // memory, and are not truncated.
+ if (State->getCallOrPrologue() == Call) {
+ unsigned excess = 4 * (ARM::R4 - reg);
+ assert(size >= excess && "expected larger existing stack allocation");
+ size -= excess;
+ }
+ }
+ // Confiscate any remaining parameter registers to preclude their
+ // assignment to subsequent parameters.
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+}
+
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
@@ -1813,6 +1867,16 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
return HasRet;
}
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!EnableARMTailCalls)
+ return false;
+
+ if (!CI->isTailCall())
+ return false;
+
+ return !Subtarget->isThumb1Only();
+}
+
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
@@ -2096,7 +2160,7 @@ ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
const {
DebugLoc dl = Op.getDebugLoc();
return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
- Op.getOperand(0), Op.getOperand(1));
+ Op.getOperand(0));
}
SDValue
@@ -2151,6 +2215,13 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
return Result;
}
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
+ ? ARMISD::VMULLs : ARMISD::VMULLu;
+ return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
}
}
@@ -2257,6 +2328,88 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
+void
+ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize)
+ const {
+ unsigned NumGPRs;
+ if (CCInfo.isFirstByValRegValid())
+ NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
+ else {
+ unsigned int firstUnalloced;
+ firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
+ sizeof(GPRArgRegs) /
+ sizeof(GPRArgRegs[0]));
+ NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
+ }
+
+ unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ VARegSize = NumGPRs * 4;
+ VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+}
+
+// The remaining GPRs hold either the beginning of variable-argument
+// data, or the beginning of an aggregate passed by value (usuall
+// byval). Either way, we allocate stack slots adjacent to the data
+// provided by our caller, and store the unallocated registers there.
+// If this is a variadic function, the va_list pointer will begin with
+// these values; otherwise, this reassembles a (byval) structure that
+// was split between registers and memory.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ unsigned ArgOffset) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned firstRegToSaveIndex;
+ if (CCInfo.isFirstByValRegValid())
+ firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
+ else {
+ firstRegToSaveIndex = CCInfo.getFirstUnallocated
+ (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ }
+
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ if (VARegSaveSize) {
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ AFI->setVarArgsRegSaveSize(VARegSaveSize);
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
+ ArgOffset + VARegSaveSize
+ - VARegSize,
+ false));
+ SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+ getPointerTy());
+
+ SmallVector<SDValue, 4> MemOps;
+ for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
+ TargetRegisterClass *RC;
+ if (AFI->isThumb1OnlyFunction())
+ RC = ARM::tGPRRegisterClass;
+ else
+ RC = ARM::GPRRegisterClass;
+
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ } else
+ // This will point to the next argument passed via stack.
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+}
+
SDValue
ARMTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -2265,7 +2418,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
-
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -2275,12 +2427,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
*DAG.getContext());
+ CCInfo.setCallOrPrologue(Prologue);
CCInfo.AnalyzeFormalArguments(Ins,
CCAssignFnForNode(CallConv, /* Return*/ false,
isVarArg));
SmallVector<SDValue, 16> ArgValues;
+ int lastInsIndex = -1;
+ SDValue ArgValue;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -2288,7 +2443,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
- SDValue ArgValue;
if (VA.needsCustom()) {
// f64 and vector types are split up into multiple registers or
// combinations of registers and stack slots.
@@ -2364,67 +2518,45 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
- unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
+ int index = ArgLocs[i].getValNo();
+
+ // Some Ins[] entries become multiple ArgLoc[] entries.
+ // Process them only once.
+ if (index != lastInsIndex)
+ {
+ ISD::ArgFlagsTy Flags = Ins[index].Flags;
+ // FIXME: For now, all byval parameter objects are marked mutable.
+ // This can be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable.
+ // Since they could be overwritten by lowering of arguments in case of
+ // a tail call.
+ if (Flags.isByVal()) {
+ unsigned VARegSize, VARegSaveSize;
+ computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
+ unsigned Bytes = Flags.getByValSize() - VARegSize;
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes,
+ VA.getLocMemOffset(), false);
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ } else {
+ int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
- // Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+ lastInsIndex = index;
+ }
}
}
// varargs
- if (isVarArg) {
- static const unsigned GPRArgRegs[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3
- };
-
- unsigned NumGPRs = CCInfo.getFirstUnallocated
- (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
-
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
- unsigned VARegSize = (4 - NumGPRs) * 4;
- unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
- unsigned ArgOffset = CCInfo.getNextStackOffset();
- if (VARegSaveSize) {
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing
- // the result of va_next.
- AFI->setVarArgsRegSaveSize(VARegSaveSize);
- AFI->setVarArgsFrameIndex(
- MFI->CreateFixedObject(VARegSaveSize,
- ArgOffset + VARegSaveSize - VARegSize,
- false));
- SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
- getPointerTy());
-
- SmallVector<SDValue, 4> MemOps;
- for (; NumGPRs < 4; ++NumGPRs) {
- TargetRegisterClass *RC;
- if (AFI->isThumb1OnlyFunction())
- RC = ARM::tGPRRegisterClass;
- else
- RC = ARM::GPRRegisterClass;
-
- unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
- }
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
- } else
- // This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
- }
+ if (isVarArg)
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
return Chain;
}
@@ -2517,6 +2649,27 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+ unsigned Opc = Cmp.getOpcode();
+ DebugLoc DL = Cmp.getDebugLoc();
+ if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+ return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+ assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+ Cmp = Cmp.getOperand(0);
+ Opc = Cmp.getOpcode();
+ if (Opc == ARMISD::CMPFP)
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ else {
+ assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
@@ -2552,7 +2705,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Cond.getValueType();
SDValue ARMcc = Cond.getOperand(2);
SDValue CCR = Cond.getOperand(3);
- SDValue Cmp = Cond.getOperand(4);
+ SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
}
}
@@ -2681,8 +2834,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
// If one of the operand is zero, it's safe to ignore the NaN case since
// we only care about equality comparisons.
(SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
- // If unsafe fp math optimization is enabled and there are no othter uses of
- // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // If unsafe fp math optimization is enabled and there are no other uses of
+ // the CMP operands, and the condition code is EQ or NE, we can optimize it
// to an integer comparison.
if (CC == ISD::SETOEQ)
CC = ISD::SETEQ;
@@ -2811,8 +2964,39 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
}
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ EVT OperandVT = Op.getOperand(0).getValueType();
+ assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!");
+ if (VT != MVT::v4f32)
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ unsigned CastOpc;
+ unsigned Opc;
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid opcode!");
+ case ISD::SINT_TO_FP:
+ CastOpc = ISD::SIGN_EXTEND;
+ Opc = ISD::SINT_TO_FP;
+ break;
+ case ISD::UINT_TO_FP:
+ CastOpc = ISD::ZERO_EXTEND;
+ Opc = ISD::UINT_TO_FP;
+ break;
+ }
+
+ Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+ return DAG.getNode(Opc, dl, VT, Op);
+}
+
static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return LowerVectorINT_TO_FP(Op, DAG);
+
DebugLoc dl = Op.getDebugLoc();
unsigned Opc;
@@ -2860,7 +3044,10 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
DAG.getConstant(32, MVT::i32));
- }
+ } else if (VT == MVT::f32)
+ Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
+ DAG.getConstant(32, MVT::i32));
Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
@@ -2869,11 +3056,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
-
+
SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
- if (SrcVT == MVT::f32) {
+ if (VT == MVT::f32) {
Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
DAG.getConstant(0, MVT::i32));
@@ -3508,6 +3695,13 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+ // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
+ // range, then 0 is placed into the resulting vector. So pretty much any mask
+ // of 8 elements can work here.
+ return VT == MVT::v8i8 && M.size() == 8;
+}
+
static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
@@ -3947,6 +4141,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16) ||
isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+ isVTBLMask(M, VT) ||
isVTRNMask(M, VT, WhichResult) ||
isVUZPMask(M, VT, WhichResult) ||
isVZIPMask(M, VT, WhichResult) ||
@@ -4024,6 +4219,29 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
}
}
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+ SmallVectorImpl<int> &ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the VTBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> VTBLMask;
+ for (SmallVectorImpl<int>::iterator
+ I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+ VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+ if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+
+ return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -4141,6 +4359,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ if (VT == MVT::v8i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
return SDValue();
}
@@ -4290,6 +4514,28 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
}
+static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
+ }
+ return false;
+}
+
+static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
+ SDNode *N0 = N->getOperand(0).getNode();
+ SDNode *N1 = N->getOperand(1).getNode();
+ return N0->hasOneUse() && N1->hasOneUse() &&
+ isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
+ }
+ return false;
+}
+
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
@@ -4298,29 +4544,73 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
- if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG))
+ bool isMLA = false;
+ bool isN0SExt = isSignExtended(N0, DAG);
+ bool isN1SExt = isSignExtended(N1, DAG);
+ if (isN0SExt && isN1SExt)
NewOpc = ARMISD::VMULLs;
- else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG))
- NewOpc = ARMISD::VMULLu;
- else if (VT == MVT::v2i64)
- // Fall through to expand this. It is not legal.
- return SDValue();
- else
- // Other vector multiplications are legal.
- return Op;
+ else {
+ bool isN0ZExt = isZeroExtended(N0, DAG);
+ bool isN1ZExt = isZeroExtended(N1, DAG);
+ if (isN0ZExt && isN1ZExt)
+ NewOpc = ARMISD::VMULLu;
+ else if (isN1SExt || isN1ZExt) {
+ // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
+ // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
+ if (isN1SExt && isAddSubSExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLs;
+ isMLA = true;
+ } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
+ std::swap(N0, N1);
+ NewOpc = ARMISD::VMULLu;
+ isMLA = true;
+ }
+ }
+
+ if (!NewOpc) {
+ if (VT == MVT::v2i64)
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
// Legalize to a VMULL instruction.
DebugLoc DL = Op.getDebugLoc();
- SDValue Op0 = SkipExtension(N0, DAG);
+ SDValue Op0;
SDValue Op1 = SkipExtension(N1, DAG);
-
- assert(Op0.getValueType().is64BitVector() &&
- Op1.getValueType().is64BitVector() &&
- "unexpected types for extended operands to VMULL");
- return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ if (!isMLA) {
+ Op0 = SkipExtension(N0, DAG);
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ }
+
+ // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
+ // isel lowering to take advantage of no-stall back to back vmul + vmla.
+ // vmull q0, d4, d6
+ // vmlal q0, d5, d6
+ // is faster than
+ // vaddl q0, d4, d5
+ // vmovl q1, d6
+ // vmul q0, q0, q1
+ SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
+ SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+ EVT Op1VT = Op1.getValueType();
+ return DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
-static SDValue
+static SDValue
LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
@@ -4331,7 +4621,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
// Get reciprocal estimate.
// float4 recip = vrecpeq_f32(yf);
- Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
// Because char has a smaller range than uchar, we can actually get away
// without any newton steps. This requires that we use a weird bias
@@ -4349,7 +4639,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
return X;
}
-static SDValue
+static SDValue
LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
SDValue N2;
// Convert to float.
@@ -4359,13 +4649,13 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
-
+
// Use reciprocal estimate and one refinement step.
// float4 recip = vrecpeq_f32(yf);
// recip *= vrecpsq_f32(yf, recip);
- N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
- N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
N1, N2);
N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
@@ -4395,15 +4685,15 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
-
+
if (VT == MVT::v8i8) {
N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
-
+
N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
DAG.getIntPtrConstant(4));
N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
- DAG.getIntPtrConstant(4));
+ DAG.getIntPtrConstant(4));
N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
DAG.getIntPtrConstant(0));
N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
@@ -4414,7 +4704,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
N0 = LowerCONCAT_VECTORS(N0, DAG);
-
+
N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
return N0;
}
@@ -4430,32 +4720,32 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
SDValue N2, N3;
-
+
if (VT == MVT::v8i8) {
N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
-
+
N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
DAG.getIntPtrConstant(4));
N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
- DAG.getIntPtrConstant(4));
+ DAG.getIntPtrConstant(4));
N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
DAG.getIntPtrConstant(0));
N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
DAG.getIntPtrConstant(0));
-
+
N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
-
+
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
N0 = LowerCONCAT_VECTORS(N0, DAG);
-
- N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
+
+ N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
N0);
return N0;
}
-
+
// v4i16 sdiv ... Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_u16(y));
// float4 xf = vcvt_f32_s32(vmovl_u16(x));
@@ -4468,13 +4758,13 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
// float4 recip = vrecpeq_f32(yf);
// recip *= vrecpsq_f32(yf, recip);
// recip *= vrecpsq_f32(yf, recip);
- N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
- N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
N1, N2);
N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
- N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
+ N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
N1, N2);
N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
@@ -4503,7 +4793,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalAddress:
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
@@ -4524,7 +4814,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
- case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
+ case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
@@ -4754,6 +5044,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *MF = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ unsigned oldval = dest;
+ DebugLoc dl = MI->getDebugLoc();
+
+ bool isThumb2 = Subtarget->isThumb2();
+ unsigned ldrOpc, strOpc, extendOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+ extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ extendOpc = 0;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loopMBB);
+ MF->insert(It, exitMBB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+ unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // (sign extend dest, if required)
+ // cmp dest, incr
+ // cmov.cond scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+
+ // Sign extend the value, if necessary.
+ if (signExtend && extendOpc) {
+ oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest));
+ }
+
+ // Build compare and cmov instructions.
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(oldval).addReg(incr));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
+ .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR);
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
static
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
@@ -4763,6 +5156,72 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
+// FIXME: This opcode table should obviously be expressed in the target
+// description. We probably just need a "machine opcode" value in the pseudo
+// instruction. But the ideal solution maybe to simply remove the "S" version
+// of the opcode altogether.
+struct AddSubFlagsOpcodePair {
+ unsigned PseudoOpc;
+ unsigned MachineOpc;
+};
+
+static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
+ {ARM::ADCSri, ARM::ADCri},
+ {ARM::ADCSrr, ARM::ADCrr},
+ {ARM::ADCSrs, ARM::ADCrs},
+ {ARM::SBCSri, ARM::SBCri},
+ {ARM::SBCSrr, ARM::SBCrr},
+ {ARM::SBCSrs, ARM::SBCrs},
+ {ARM::RSBSri, ARM::RSBri},
+ {ARM::RSBSrr, ARM::RSBrr},
+ {ARM::RSBSrs, ARM::RSBrs},
+ {ARM::RSCSri, ARM::RSCri},
+ {ARM::RSCSrs, ARM::RSCrs},
+ {ARM::t2ADCSri, ARM::t2ADCri},
+ {ARM::t2ADCSrr, ARM::t2ADCrr},
+ {ARM::t2ADCSrs, ARM::t2ADCrs},
+ {ARM::t2SBCSri, ARM::t2SBCri},
+ {ARM::t2SBCSrr, ARM::t2SBCrr},
+ {ARM::t2SBCSrs, ARM::t2SBCrs},
+ {ARM::t2RSBSri, ARM::t2RSBri},
+ {ARM::t2RSBSrs, ARM::t2RSBrs},
+};
+
+// Convert and Add or Subtract with Carry and Flags to a generic opcode with
+// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>).
+//
+// FIXME: Somewhere we should assert that CPSR<def> is in the correct
+// position to be recognized by the target descrition as the 'S' bit.
+bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ unsigned OldOpc = MI->getOpcode();
+ unsigned NewOpc = 0;
+
+ // This is only called for instructions that need remapping, so iterating over
+ // the tiny opcode table is not costly.
+ static const int NPairs =
+ sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair);
+ for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0],
+ *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) {
+ if (OldOpc == Pair->PseudoOpc) {
+ NewOpc = Pair->MachineOpc;
+ break;
+ }
+ }
+ if (!NewOpc)
+ return false;
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
+ for (unsigned i = 0; i < MI->getNumOperands(); ++i)
+ MIB.addOperand(MI->getOperand(i));
+ AddDefaultPred(MIB);
+ MIB.addReg(ARM::CPSR, RegState::Define); // S bit
+ MI->eraseFromParent();
+ return true;
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -4770,10 +5229,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
- default:
+ default: {
+ if (RemapAddSubWithFlags(MI, BB))
+ return BB;
+
MI->dump();
llvm_unreachable("Unexpected instr type to insert");
-
+ }
case ARM::ATOMIC_LOAD_ADD_I8:
return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
case ARM::ATOMIC_LOAD_ADD_I16:
@@ -4816,6 +5278,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_LOAD_SUB_I32:
return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_MIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
+ case ARM::ATOMIC_LOAD_MIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
+
+ case ARM::ATOMIC_LOAD_MAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
+ case ARM::ATOMIC_LOAD_MAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
+
+ case ARM::ATOMIC_LOAD_UMIN_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
+ case ARM::ATOMIC_LOAD_UMIN_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
+
+ case ARM::ATOMIC_LOAD_UMAX_I8:
+ return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I16:
+ return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
+ case ARM::ATOMIC_LOAD_UMAX_I32:
+ return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
+
case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
@@ -5034,6 +5524,42 @@ static SDValue PerformSUBCombine(SDNode *N,
return SDValue();
}
+/// PerformVMULCombine
+/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
+/// special multiplier accumulator forwarding.
+/// vmul d3, d0, d2
+/// vmla d3, d1, d2
+/// is faster than
+/// vadd d3, d0, d1
+/// vmul d3, d3, d2
+static SDValue PerformVMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasVMLxForwarding())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB) {
+ Opcode = N0.getOpcode();
+ if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
+ Opcode != ISD::FADD && Opcode != ISD::FSUB)
+ return SDValue();
+ std::swap(N0, N1);
+ }
+
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ return DAG.getNode(Opcode, DL, VT,
+ DAG.getNode(ISD::MUL, DL, VT, N00, N1),
+ DAG.getNode(ISD::MUL, DL, VT, N01, N1));
+}
+
static SDValue PerformMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -5046,6 +5572,8 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
EVT VT = N->getValueType(0);
+ if (VT.is64BitVector() || VT.is128BitVector())
+ return PerformVMULCombine(N, DCI, Subtarget);
if (VT != MVT::i32)
return SDValue();
@@ -5088,12 +5616,16 @@ static SDValue PerformMULCombine(SDNode *N,
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
+
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
@@ -5127,6 +5659,9 @@ static SDValue PerformORCombine(SDNode *N,
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
+ if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
@@ -5147,6 +5682,37 @@ static SDValue PerformORCombine(SDNode *N,
}
}
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+ SDValue N1 = N->getOperand(1);
+
+ // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+ if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
+ APInt SplatBits0;
+ if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs) {
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
+ APInt SplatBits1;
+ if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
+ HasAnyUndefs) && !HasAnyUndefs &&
+ SplatBits0 == ~SplatBits1) {
+ // Canonicalize the vector type to make instruction selection simpler.
+ EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+ N0->getOperand(1), N0->getOperand(0),
+ N1->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+ }
+
// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
@@ -5154,19 +5720,16 @@ static SDValue PerformORCombine(SDNode *N,
if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
return SDValue();
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
// 1) or (and A, mask), val => ARMbfi A, val, mask
// iff (val & mask) == val
//
// 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
// 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
- // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // && mask == ~mask2
// 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
- // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // && ~mask == mask2
// (i.e., copy a bitfield value into another bitfield of the same width)
- if (N0.getOpcode() != ISD::AND)
- return SDValue();
if (VT != MVT::i32)
return SDValue();
@@ -5209,26 +5772,26 @@ static SDValue PerformORCombine(SDNode *N,
return SDValue();
unsigned Mask2 = N11C->getZExtValue();
+ // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
+ // as is to match.
if (ARM::isBitFieldInvertedMask(Mask) &&
- ARM::isBitFieldInvertedMask(~Mask2) &&
- (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+ (Mask == ~Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasT2ExtractPack() &&
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
- unsigned lsb = CountTrailingZeros_32(Mask2);
+ unsigned amt = CountTrailingZeros_32(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
- DAG.getConstant(lsb, MVT::i32));
+ DAG.getConstant(amt, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
DAG.getConstant(Mask, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
} else if (ARM::isBitFieldInvertedMask(~Mask) &&
- ARM::isBitFieldInvertedMask(Mask2) &&
- (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+ (~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
if (Subtarget->hasT2ExtractPack() &&
@@ -5239,7 +5802,7 @@ static SDValue PerformORCombine(SDNode *N,
Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
- DAG.getConstant(Mask2, MVT::i32));
+ DAG.getConstant(Mask2, MVT::i32));
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, Res, false);
return SDValue();
@@ -5294,6 +5857,37 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SDValue InDouble = N->getOperand(0);
if (InDouble.getOpcode() == ARMISD::VMOVDRR)
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+
+ // vmovrrd(load f64) -> (load i32), (load i32)
+ SDNode *InNode = InDouble.getNode();
+ if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
+ InNode->getValueType(0) == MVT::f64 &&
+ InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
+ !cast<LoadSDNode>(InNode)->isVolatile()) {
+ // TODO: Should this be done for non-FrameIndex operands?
+ LoadSDNode *LD = cast<LoadSDNode>(InNode);
+
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = LD->getDebugLoc();
+ SDValue BasePtr = LD->getBasePtr();
+ SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
+ LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(),
+ std::min(4U, LD->getAlignment() / 2));
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
+ DCI.RemoveFromWorklist(LD);
+ DAG.DeleteNode(LD);
+ return Result;
+ }
+
return SDValue();
}
@@ -5323,8 +5917,28 @@ static SDValue PerformSTORECombine(SDNode *N,
// Otherwise, the i64 value will be legalized to a pair of i32 values.
StoreSDNode *St = cast<StoreSDNode>(N);
SDValue StVal = St->getValue();
- if (!ISD::isNormalStore(St) || St->isVolatile() ||
- StVal.getValueType() != MVT::i64 ||
+ if (!ISD::isNormalStore(St) || St->isVolatile())
+ return SDValue();
+
+ if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
+ StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+ SelectionDAG &DAG = DCI.DAG;
+ DebugLoc DL = St->getDebugLoc();
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 = DAG.getStore(St->getChain(), DL,
+ StVal.getNode()->getOperand(0), BasePtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+ DAG.getConstant(4, MVT::i32));
+ return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+ OffsetPtr, St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(),
+ std::min(4U, St->getAlignment() / 2));
+ }
+
+ if (StVal.getValueType() != MVT::i64 ||
StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
@@ -5553,7 +6167,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
EVT VecTy;
if (isLoad)
VecTy = N->getValueType(0);
- else
+ else
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (isLaneOp)
@@ -5603,7 +6217,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
break;
- }
+ }
return SDValue();
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index dc400c4..a2e6260 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -57,7 +57,6 @@ namespace llvm {
CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR.
FMSTAT, // ARM fmstat instruction.
CMOV, // ARM conditional move instructions.
- CNEG, // ARM conditional negate instructions.
BCC_i64,
@@ -89,7 +88,7 @@ namespace llvm {
MEMBARRIER_MCR, // Memory barrier (MCR)
PRELOAD, // Preload
-
+
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
@@ -154,6 +153,8 @@ namespace llvm {
VZIP, // zip (interleave)
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ VTBL1, // 1-register shuffle with mask
+ VTBL2, // 2-register shuffle with mask
// Vector multiply long:
VMULLs, // ...signed
@@ -172,12 +173,15 @@ namespace llvm {
// Bit-field insert
BFI,
-
+
// Vector OR with immediate
VORRIMM,
// Vector AND with NOT of immediate
VBICIMM,
+ // Vector bitwise select
+ VBSL,
+
// Vector load N-element structure to all lanes:
VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
VLD3DUP,
@@ -330,9 +334,6 @@ namespace llvm {
Sched::Preference getSchedulingPreference(SDNode *N) const;
- unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const;
-
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -407,7 +408,7 @@ namespace llvm {
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
@@ -425,6 +426,13 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain, unsigned ArgOffset)
+ const;
+
+ void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned &VARegSize, unsigned &VARegSaveSize) const;
+
virtual SDValue
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
@@ -435,6 +443,9 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ /// HandleByVal - Target-specific cleanup for ByVal support.
+ virtual void HandleByVal(CCState *, unsigned &) const;
+
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
@@ -456,10 +467,13 @@ namespace llvm {
virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
@@ -470,16 +484,22 @@ namespace llvm {
MachineBasicBlock *BB,
unsigned Size,
unsigned BinOpcode) const;
+ MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ bool signExtend,
+ ARMCC::CondCodes Cond) const;
+ bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const;
};
-
+
enum NEONModImmType {
VMOVModImm,
VMVNModImm,
OtherModImm
};
-
-
+
+
namespace ARM {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 359ac45..f5fb98e 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -206,19 +206,30 @@ def setend_op : Operand<i32> {
let PrintMethod = "printSetendOperand";
}
-def cps_opt : Operand<i32> {
- let PrintMethod = "printCPSOptionOperand";
-}
-
def msr_mask : Operand<i32> {
let PrintMethod = "printMSRMaskOperand";
let ParserMatchClass = MSRMaskOperand;
}
-// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
-// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
-def neg_zero : Operand<i32> {
- let PrintMethod = "printNegZeroOperand";
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The imm6 field is encoded like so:
+//
+// Offset Encoding
+// 8 imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0>
+// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
+// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
+// 64 64 - <imm> is encoded in imm6<5:0>
+def shr_imm8 : Operand<i32> {
+ let EncoderMethod = "getShiftRight8Imm";
+}
+def shr_imm16 : Operand<i32> {
+ let EncoderMethod = "getShiftRight16Imm";
+}
+def shr_imm32 : Operand<i32> {
+ let EncoderMethod = "getShiftRight32Imm";
+}
+def shr_imm64 : Operand<i32> {
+ let EncoderMethod = "getShiftRight64Imm";
}
//===----------------------------------------------------------------------===//
@@ -279,6 +290,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
let OutOperandList = oops;
let InOperandList = iops;
let Pattern = pattern;
+ let isCodeGenOnly = 1;
}
// PseudoInst that's ARM-mode only.
@@ -422,11 +434,11 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
opc, asm, "", pattern> {
bits<4> Rd;
bits<4> Rt;
- bits<4> Rn;
+ bits<4> addr;
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
let Inst{20} = 0;
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rd;
let Inst{11-4} = 0b11111001;
let Inst{3-0} = Rt;
@@ -513,6 +525,24 @@ class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
let Inst{19-16} = Rn;
let Inst{11-0} = offset{11-0};
}
+// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB
+// but for now use this class for STRT and STRBT.
+class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+ pattern> {
+ // AM2 store w/ two operands: (GPR, am2offset)
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+}
// addrmode3 instructions
class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
@@ -547,6 +577,34 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
let Inst{15-12} = Rt; // Rt
let Inst{7-4} = op;
}
+
+// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB
+// but for now use this class for LDRSBT, LDRHT, LDSHT.
+class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+ IndexMode im, Format f, InstrItinClass itin, string opc,
+ string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+ opc, asm, cstr, pattern> {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{27-25} = 0b000;
+ let Inst{24} = isPre; // P bit
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{20} = op20; // L bit
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{15-12} = Rt; // Rt
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{7-4} = op;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+ let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3";
+}
+
class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
IndexMode im, Format f, InstrItinClass itin, string opc,
string asm, string cstr, list<dag> pattern>
@@ -619,12 +677,25 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
opc, asm, cstr,pattern> {
+ // {13} 1 == imm8, 0 == Rm
+ // {12-9} Rn
+ // {8} isAdd
+ // {7-4} imm7_4/zero
+ // {3-0} imm3_0/Rm
+ bits<14> addr;
+ bits<4> Rt;
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
let Inst{4} = 1;
let Inst{5} = 1; // H bit
let Inst{6} = 0; // S bit
let Inst{7} = 1;
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{15-12} = Rt; // Rt
+ let Inst{19-16} = addr{12-9}; // Rn
let Inst{20} = 0; // L bit
let Inst{21} = 0; // W bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{23} = addr{8}; // U bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -1670,7 +1741,8 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
}
// NEON 3 vector register format.
-class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+
+class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
@@ -1680,6 +1752,13 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
let Inst{11-8} = op11_8;
let Inst{6} = op6;
let Inst{4} = op4;
+}
+
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
// Instruction operands.
bits<5> Vd;
@@ -1694,6 +1773,47 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
let Inst{5} = Vm{4};
}
+class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bit lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = lane;
+}
+
+class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string dt, string asm, string cstr, list<dag> pattern>
+ : N3VCommon<op24, op23, op21_20, op11_8, op6, op4,
+ oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ bits<2> lane;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{2-0} = Vm{2-0};
+ let Inst{5} = lane{1};
+ let Inst{3} = lane{0};
+}
+
// Same as N3V except it doesn't have a data type suffix.
class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4,
@@ -1730,6 +1850,8 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
let Inst{11-8} = opcod2;
let Inst{6-5} = opcod3;
let Inst{4} = 1;
+ // A8.6.303, A8.6.328, A8.6.329
+ let Inst{3-0} = 0b0000;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 6e3fe2e..209c1a3 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -58,7 +58,7 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -93,8 +93,6 @@ def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
-def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
- [SDNPInGlue]>;
def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
@@ -205,13 +203,13 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
}]>;
/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : PatLeaf<(i32 imm), [{
- return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
+def imm1_15 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
}]>;
/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
-def imm16_31 : PatLeaf<(i32 imm), [{
- return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
+def imm16_31 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
}]>;
def so_imm_neg :
@@ -241,8 +239,8 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
/// imm0_65535 predicate - True if the 32-bit immediate is in the range
/// [0.65535].
-def imm0_65535 : PatLeaf<(i32 imm), [{
- return (uint32_t)N->getZExtValue() < 65536;
+def imm0_65535 : ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 65536;
}]>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
@@ -377,17 +375,23 @@ def neon_vcvt_imm32 : Operand<i32> {
}
// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
-def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
- int32_t v = (int32_t)N->getZExtValue();
+def rot_imm : Operand<i32>, ImmLeaf<i32, [{
+ int32_t v = (int32_t)Imm;
return v == 8 || v == 16 || v == 24; }]> {
let EncoderMethod = "getRotImmOpValue";
}
+def ShifterAsmOperand : AsmOperandClass {
+ let Name = "Shifter";
+ let SuperClasses = [];
+}
+
// shift_imm: An integer that encodes a shift amount and the type of shift
// (currently either asr or lsl) using the same encoding used for the
// immediates in so_reg operands.
def shift_imm : Operand<i32> {
let PrintMethod = "printShiftImmOperand";
+ let ParserMatchClass = ShifterAsmOperand;
}
// shifter_operand operands: so_reg and so_imm.
@@ -396,19 +400,21 @@ def so_reg : Operand<i32>, // reg reg imm
[shl,srl,sra,rotr]> {
let EncoderMethod = "getSORegOpValue";
let PrintMethod = "printSORegOperand";
- let MIOperandInfo = (ops GPR, GPR, i32imm);
+ let MIOperandInfo = (ops GPR, GPR, shift_imm);
}
def shift_so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
[shl,srl,sra,rotr]> {
let EncoderMethod = "getSORegOpValue";
let PrintMethod = "printSORegOperand";
- let MIOperandInfo = (ops GPR, GPR, i32imm);
+ let MIOperandInfo = (ops GPR, GPR, shift_imm);
}
// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits.
-def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
+def so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ }]> {
let EncoderMethod = "getSOImmOpValue";
let PrintMethod = "printSOImmOperand";
}
@@ -429,13 +435,13 @@ def arm_i32imm : PatLeaf<(imm), [{
}]>;
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
- return (int32_t)N->getZExtValue() < 32;
+def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
}]>;
/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
-def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{
- return (int32_t)N->getZExtValue() < 32;
+def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 32;
}]> {
let EncoderMethod = "getImmMinusOneOpValue";
}
@@ -458,19 +464,30 @@ def bf_inv_mask_imm : Operand<i32>,
}
/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
-def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{
- return isInt<5>(N->getSExtValue());
+def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{
+ return isInt<5>(Imm);
}]>;
/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
-def width_imm : Operand<i32>, PatLeaf<(imm), [{
- return N->getSExtValue() > 0 && N->getSExtValue() <= 32;
+def width_imm : Operand<i32>, ImmLeaf<i32, [{
+ return Imm > 0 && Imm <= 32;
}] > {
let EncoderMethod = "getMsbOpValue";
}
// Define ARM specific addressing modes.
+def MemMode2AsmOperand : AsmOperandClass {
+ let Name = "MemMode2";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMemMode2Operand";
+}
+
+def MemMode3AsmOperand : AsmOperandClass {
+ let Name = "MemMode3";
+ let SuperClasses = [];
+ let ParserMethod = "tryParseMemMode3Operand";
+}
// addrmode_imm12 := reg +/- imm12
//
@@ -501,6 +518,7 @@ def addrmode2 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode2", []> {
let EncoderMethod = "getAddrMode2OpValue";
let PrintMethod = "printAddrMode2Operand";
+ let ParserMatchClass = MemMode2AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
@@ -519,6 +537,7 @@ def addrmode3 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode3", []> {
let EncoderMethod = "getAddrMode3OpValue";
let PrintMethod = "printAddrMode3Operand";
+ let ParserMatchClass = MemMode3AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
@@ -586,6 +605,21 @@ def addrmodepc : Operand<i32>,
let MIOperandInfo = (ops GPR, i32imm);
}
+def MemMode7AsmOperand : AsmOperandClass {
+ let Name = "MemMode7";
+ let SuperClasses = [];
+}
+
+// addrmode7 := reg
+// Used by load/store exclusive instructions. Useful to enable right assembly
+// parsing and printing. Not used for any codegen matching.
+//
+def addrmode7 : Operand<i32> {
+ let PrintMethod = "printAddrMode7Operand";
+ let MIOperandInfo = (ops GPR);
+ let ParserMatchClass = MemMode7AsmOperand;
+}
+
def nohash_imm : Operand<i32> {
let PrintMethod = "printNoHashImmediate";
}
@@ -902,52 +936,23 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{19-16} = Rn;
}
}
+}
+
// Carry setting variants
-let isCodeGenOnly = 1, Defs = [CPSR] in {
-multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
- def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"),
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{11-0} = imm;
- let Inst{20} = 1;
- let Inst{25} = 1;
- }
- def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"),
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<4> Rm;
- let Inst{11-4} = 0b00000000;
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
+ def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ Size4Bytes, IIC_iALUi,
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
+ def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ Size4Bytes, IIC_iALUr,
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
let isCommutable = Commutable;
- let Inst{3-0} = Rm;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{20} = 1;
- let Inst{25} = 0;
- }
- def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"),
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{11-0} = shift;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{20} = 1;
- let Inst{25} = 0;
}
-}
+ def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ Size4Bytes, IIC_iALUsr,
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
}
}
@@ -972,6 +977,7 @@ multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
[(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
bits<4> Rt;
bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
let Inst{23} = shift{12}; // U (add = ('U' == 1))
let Inst{19-16} = shift{16-13}; // Rn
let Inst{15-12} = Rt;
@@ -1001,6 +1007,7 @@ multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
[(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
bits<4> Rt;
bits<17> shift;
+ let shift{4} = 0; // Inst{4} = 0
let Inst{23} = shift{12}; // U (add = ('U' == 1))
let Inst{19-16} = shift{16-13}; // Rn
let Inst{15-12} = Rt;
@@ -1249,7 +1256,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
// The 'adr' mnemonic encodes differently if the label is before or after
// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
// know until then which form of the instruction will be used.
-def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label),
+def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
bits<4> Rd;
bits<12> label;
@@ -1311,6 +1318,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// before calls from potentially appearing dead.
let isCall = 1,
// On non-Darwin platforms R9 is callee-saved.
+ // FIXME: Do we really need a non-predicated version? If so, it should
+ // at least be a pseudo instruction expanding to the predicated version
+ // at MC lowering time.
Defs = [R0, R1, R2, R3, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
@@ -1340,7 +1350,16 @@ let isCall = 1,
Requires<[IsARM, HasV5T, IsNotDarwin]> {
bits<4> func;
let Inst{31-4} = 0b1110000100101111111111110011;
- let Inst{3-0} = func;
+ let Inst{3-0} = func;
+ }
+
+ def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+ IIC_Br, "blx", "\t$func",
+ [(ARMcall_pred GPR:$func)]>,
+ Requires<[IsARM, HasV5T, IsNotDarwin]> {
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110011;
+ let Inst{3-0} = func;
}
// ARMv4T
@@ -1364,30 +1383,25 @@ let isCall = 1,
D16, D17, D18, D19, D20, D21, D22, D23,
D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
Uses = [R7, SP] in {
- def BLr9 : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops),
- IIC_Br, "bl\t$func",
- [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> {
- let Inst{31-28} = 0b1110;
- bits<24> func;
- let Inst{23-0} = func;
- }
+ def BLr9 : ARMPseudoInst<(outs), (ins bltarget:$func, variable_ops),
+ Size4Bytes, IIC_Br,
+ [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>;
- def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops),
- IIC_Br, "bl", "\t$func",
+ def BLr9_pred : ARMPseudoInst<(outs),
+ (ins bltarget:$func, pred:$p, variable_ops),
+ Size4Bytes, IIC_Br,
[(ARMcall_pred tglobaladdr:$func)]>,
- Requires<[IsARM, IsDarwin]> {
- bits<24> func;
- let Inst{23-0} = func;
- }
+ Requires<[IsARM, IsDarwin]>;
// ARMv5T and above
- def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
- IIC_Br, "blx\t$func",
- [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
- bits<4> func;
- let Inst{31-4} = 0b1110000100101111111111110011;
- let Inst{3-0} = func;
- }
+ def BLXr9 : ARMPseudoInst<(outs), (ins GPR:$func, variable_ops),
+ Size4Bytes, IIC_Br,
+ [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]>;
+
+ def BLXr9_pred: ARMPseudoInst<(outs), (ins GPR:$func, pred:$p, variable_ops),
+ Size4Bytes, IIC_Br,
+ [(ARMcall_pred GPR:$func)]>,
+ Requires<[IsARM, HasV5T, IsDarwin]>;
// ARMv4T
// Note: Restrict $func to the tGPR regclass to prevent it being in LR.
@@ -1403,11 +1417,7 @@ let isCall = 1,
// Tail calls.
-// FIXME: These should probably be xformed into the non-TC versions of the
-// instructions as part of MC lowering.
-// FIXME: These seem to be used for both Thumb and ARM instruction selection.
-// Thumb should have its own version since the instruction is actually
-// different, even though the mnemonic is the same.
+// FIXME: The Thumb versions of these should live in ARMInstrThumb.td
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Darwin versions.
let Defs = [R0, R1, R2, R3, R9, R12,
@@ -1421,21 +1431,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
IIC_Br, []>, Requires<[IsDarwin]>;
- def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
- IIC_Br, "b\t$dst @ TAILCALL",
+ def TAILJMPd : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
+ Size4Bytes, IIC_Br,
[]>, Requires<[IsARM, IsDarwin]>;
- def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
- IIC_Br, "b.w\t$dst @ TAILCALL",
+ def tTAILJMPd: tPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
+ Size4Bytes, IIC_Br,
[]>, Requires<[IsThumb, IsDarwin]>;
- def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
- BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
- []>, Requires<[IsDarwin]> {
- bits<4> dst;
- let Inst{31-4} = 0b1110000100101111111111110001;
- let Inst{3-0} = dst;
- }
+ def TAILJMPr : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ Size4Bytes, IIC_Br,
+ []>, Requires<[IsARM, IsDarwin]>;
+
+ def tTAILJMPr : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ Size4Bytes, IIC_Br,
+ []>, Requires<[IsThumb, IsDarwin]>;
}
// Non-Darwin versions (the difference is R9).
@@ -1450,34 +1460,31 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
IIC_Br, []>, Requires<[IsNotDarwin]>;
- def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
- IIC_Br, "b\t$dst @ TAILCALL",
+ def TAILJMPdND : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
+ Size4Bytes, IIC_Br,
[]>, Requires<[IsARM, IsNotDarwin]>;
- def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
- IIC_Br, "b.w\t$dst @ TAILCALL",
+ def tTAILJMPdND : tPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
+ Size4Bytes, IIC_Br,
[]>, Requires<[IsThumb, IsNotDarwin]>;
- def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
- BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
- []>, Requires<[IsNotDarwin]> {
- bits<4> dst;
- let Inst{31-4} = 0b1110000100101111111111110001;
- let Inst{3-0} = dst;
- }
+ def TAILJMPrND : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ Size4Bytes, IIC_Br,
+ []>, Requires<[IsARM, IsNotDarwin]>;
+ def tTAILJMPrND : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+ Size4Bytes, IIC_Br,
+ []>, Requires<[IsThumb, IsNotDarwin]>;
}
}
let isBranch = 1, isTerminator = 1 in {
- // B is "predicable" since it can be xformed into a Bcc.
+ // B is "predicable" since it's just a Bcc with an 'always' condition.
let isBarrier = 1 in {
let isPredicable = 1 in
- def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br,
- "b\t$target", [(br bb:$target)]> {
- bits<24> target;
- let Inst{31-28} = 0b1110;
- let Inst{23-0} = target;
- }
+ // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
+ // should be sufficient.
+ def B : ARMPseudoInst<(outs), (ins brtarget:$target), Size4Bytes, IIC_Br,
+ [(br bb:$target)]>;
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : ARMPseudoInst<(outs),
@@ -1509,6 +1516,16 @@ let isBranch = 1, isTerminator = 1 in {
}
}
+// BLX (immediate) -- for disassembly only
+def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary,
+ "blx\t$target", [/* pattern left blank */]>,
+ Requires<[IsARM, HasV5T]> {
+ let Inst{31-25} = 0b1111101;
+ bits<25> target;
+ let Inst{23-0} = target{24-1};
+ let Inst{24} = target{0};
+}
+
// Branch and Exchange Jazelle -- for disassembly only
def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
[/* For disassembly only; pattern left blank */]> {
@@ -1533,6 +1550,7 @@ def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
let Inst{23-0} = svc;
}
}
+def : MnemonicAlias<"swi", "svc">;
// Store Return State is a system instruction -- for disassembly only
let isCodeGenOnly = 1 in { // FIXME: This should not use submode!
@@ -1541,6 +1559,8 @@ def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{22-20} = 0b110; // W = 1
+ let Inst{19-8} = 0xd05;
+ let Inst{7-5} = 0b000;
}
def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
@@ -1548,6 +1568,8 @@ def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{22-20} = 0b100; // W = 0
+ let Inst{19-8} = 0xd05;
+ let Inst{7-5} = 0b000;
}
// Return From Exception is a system instruction -- for disassembly only
@@ -1556,6 +1578,7 @@ def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{22-20} = 0b011; // W = 1
+ let Inst{15-0} = 0x0a00;
}
def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
@@ -1563,6 +1586,7 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{22-20} = 0b001; // W = 0
+ let Inst{15-0} = 0x0a00;
}
} // isCodeGenOnly = 1
@@ -1610,15 +1634,11 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
[(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
- isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring?
-// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1)
-// how to represent that such that tblgen is happy and we don't
-// mark this codegen only?
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
(ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr",
+ IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr",
[]>, Requires<[IsARM, HasV5TE]>;
}
@@ -1636,6 +1656,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
let Inst{23} = addr{12};
let Inst{19-16} = addr{17-14};
let Inst{11-0} = addr{11-0};
+ let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
}
def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins GPR:$Rn, am2offset:$offset),
@@ -1688,40 +1709,80 @@ let mayLoad = 1, neverHasSideEffects = 1 in {
defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
-let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
-defm LDRD : AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>;
+let hasExtraDefRegAllocReq = 1 in {
+def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins addrmode3:$addr), IndexModePre,
+ LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, $addr!",
+ "$addr.base = $Rn_wb", []> {
+ bits<14> addr;
+ let Inst{23} = addr{8}; // U bit
+ let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = addr{12-9}; // Rn
+ let Inst{11-8} = addr{7-4}; // imm7_4/zero
+ let Inst{3-0} = addr{3-0}; // imm3_0/Rm
+}
+def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
+ (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+ LdMiscFrm, IIC_iLoad_d_ru,
+ "ldrd", "\t$Rt, $Rt2, [$Rn], $offset",
+ "$Rn = $Rn_wb", []> {
+ bits<10> offset;
+ bits<4> Rn;
+ let Inst{23} = offset{8}; // U bit
+ let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm
+ let Inst{19-16} = Rn;
+ let Inst{11-8} = offset{7-4}; // imm7_4/zero
+ let Inst{3-0} = offset{3-0}; // imm3_0/Rm
+}
+} // hasExtraDefRegAllocReq = 1
} // mayLoad = 1, neverHasSideEffects = 1
// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
let mayLoad = 1, neverHasSideEffects = 1 in {
-def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am2offset:$offset), IndexModeNone,
- LdFrm, IIC_iLoad_ru,
- "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru,
+ "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
let Inst{21} = 1; // overwrite
-}
-def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am2offset:$offset), IndexModeNone,
- LdFrm, IIC_iLoad_bh_ru,
- "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+ let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
+}
+def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru,
+ "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
+ // {17-14} Rn
+ // {13} 1 == Rm, 0 == imm12
+ // {12} isAdd
+ // {11-0} imm12/Rm
+ bits<18> addr;
+ let Inst{25} = addr{13};
+ let Inst{23} = addr{12};
let Inst{21} = 1; // overwrite
+ let Inst{19-16} = addr{17-14};
+ let Inst{11-0} = addr{11-0};
+ let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2";
}
-def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am3offset:$offset), IndexModePost,
- LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
-def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am3offset:$offset), IndexModePost,
- LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
-def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am3offset:$offset), IndexModePost,
- LdMiscFrm, IIC_iLoad_bh_ru,
- "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb),
+ (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru,
+ "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
}
@@ -1734,55 +1795,61 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
[(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
// Store doubleword
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1,
- isCodeGenOnly = 1 in // $src2 doesn't exist in asm string
-def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
StMiscFrm, IIC_iStore_d_r,
- "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
+ "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>;
// Indexed stores
def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
IndexModePre, StFrm, IIC_iStore_ru,
- "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ "str", "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPR:$Rn_wb,
(pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
IndexModePost, StFrm, IIC_iStore_ru,
- "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ "str", "\t$Rt, [$Rn], $offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPR:$Rn_wb,
(post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
IndexModePre, StFrm, IIC_iStore_bh_ru,
- "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ "strb", "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
GPR:$Rn, am2offset:$offset))]>;
def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
IndexModePost, StFrm, IIC_iStore_bh_ru,
- "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ "strb", "\t$Rt, [$Rn], $offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
GPR:$Rn, am2offset:$offset))]>;
def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
(ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
IndexModePre, StMiscFrm, IIC_iStore_ru,
- "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+ "strh", "\t$Rt, [$Rn, $offset]!",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPR:$Rn_wb,
(pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
- "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+ "strh", "\t$Rt, [$Rn], $offset",
+ "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
GPR:$Rn, am3offset:$offset))]>;
// For disassembly only
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
(ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
StMiscFrm, IIC_iStore_d_ru,
@@ -1795,31 +1862,32 @@ def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
StMiscFrm, IIC_iStore_d_ru,
"strd", "\t$src1, $src2, [$base], $offset",
"$base = $base_wb", []>;
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
// STRT, STRBT, and STRHT are for disassembly only.
-def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn,am2offset:$offset),
- IndexModeNone, StFrm, IIC_iStore_ru,
- "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
- [/* For disassembly only; pattern left blank */]> {
+def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
+ IndexModePost, StFrm, IIC_iStore_ru,
+ "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
+ let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
}
-def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
- IndexModeNone, StFrm, IIC_iStore_bh_ru,
- "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
- [/* For disassembly only; pattern left blank */]> {
+def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr),
+ IndexModePost, StFrm, IIC_iStore_bh_ru,
+ "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
+ let AsmMatchConverter = "CvtStWriteBackRegAddrMode2";
}
-def STRHT: AI3sthpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am3offset:$offset),
+def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr),
StMiscFrm, IIC_iStore_bh_ru,
- "strht", "\t$src, [$base], $offset", "$base = $base_wb",
+ "strht", "\t$Rt, $addr", "$addr.base = $base_wb",
[/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
+ let AsmMatchConverter = "CvtStWriteBackRegAddrMode3";
}
//===----------------------------------------------------------------------===//
@@ -1892,7 +1960,7 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
}
-}
+}
let neverHasSideEffects = 1 in {
@@ -1912,16 +1980,10 @@ def : MnemonicAlias<"stm", "stmia">;
// FIXME: Should pc be an implicit operand like PICADD, etc?
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
-// FIXME: Should be a pseudo-instruction.
-def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
- reglist:$regs, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
- "ldmia${p}\t$Rn!, $regs",
- "$Rn = $wb", []> {
- let Inst{24-23} = 0b01; // Increment After
- let Inst{21} = 1; // Writeback
- let Inst{20} = 1; // Load
-}
+def LDMIA_RET : ARMPseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+ reglist:$regs, variable_ops),
+ Size4Bytes, IIC_iLoad_mBr, []>,
+ RegConstraint<"$Rn = $wb">;
//===----------------------------------------------------------------------===//
// Move Instructions.
@@ -1933,6 +1995,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
bits<4> Rd;
bits<4> Rm;
+ let Inst{19-16} = 0b0000;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
let Inst{3-0} = Rm;
@@ -1959,6 +2022,7 @@ def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
bits<4> Rd;
bits<12> src;
let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
let Inst{11-0} = src;
let Inst{25} = 0;
}
@@ -2145,10 +2209,12 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
// ADC and SUBC with 's' bit set.
-defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
- BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
-defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
- BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
+let usesCustomInserter = 1 in {
+defm ADCS : AI1_adde_sube_s_irs<
+ BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<
+ BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
+}
def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
@@ -2190,31 +2256,17 @@ def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
}
// RSB with 's' bit set.
-let isCodeGenOnly = 1, Defs = [CPSR] in {
-def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
- IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{20} = 1;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{11-0} = imm;
-}
-def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{20} = 1;
- let Inst{11-0} = shift;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ Size4Bytes, IIC_iALUi,
+ [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>;
+def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ Size4Bytes, IIC_iALUr,
+ [/* For disassembly only; pattern left blank */]>;
+def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ Size4Bytes, IIC_iALUsr,
+ [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>;
}
let Uses = [CPSR] in {
@@ -2258,34 +2310,14 @@ def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
}
}
-// FIXME: Allow these to be predicated.
-let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in {
-def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
- DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{20} = 1;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
- let Inst{11-0} = imm;
-}
-def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
- DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
- Requires<[IsARM]> {
- bits<4> Rd;
- bits<4> Rn;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{20} = 1;
- let Inst{11-0} = shift;
- let Inst{15-12} = Rd;
- let Inst{19-16} = Rn;
-}
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1, Uses = [CPSR] in {
+def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ Size4Bytes, IIC_iALUi,
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>;
+def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ Size4Bytes, IIC_iALUsr,
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>;
}
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
@@ -2300,8 +2332,10 @@ def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
-def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
+def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm),
(SBCri GPR:$src, so_imm_not:$imm)>;
+def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm),
+ (SBCSri GPR:$src, so_imm_not:$imm)>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -2617,14 +2651,16 @@ def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]> {
+ let Inst{15-12} = 0b0000;
+}
}
let Constraints = "@earlyclobber $Rd" in
def MLAv5: ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
- Size4Bytes, IIC_iMAC32,
- [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Size4Bytes, IIC_iMAC32,
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
Requires<[IsARM, NoV6]> {
bits<4> Ra;
let Inst{15-12} = Ra;
@@ -2657,7 +2693,7 @@ let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
Size4Bytes, IIC_iMUL64, []>,
Requires<[IsARM, NoV6]>;
@@ -2681,15 +2717,15 @@ def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
// Multiply + accumulate
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
Size4Bytes, IIC_iMAC64, []>,
Requires<[IsARM, NoV6]>;
def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
Size4Bytes, IIC_iMAC64, []>,
Requires<[IsARM, NoV6]>;
def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
- (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+ (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
Size4Bytes, IIC_iMAC64, []>,
Requires<[IsARM, NoV6]>;
@@ -2970,17 +3006,25 @@ def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "revsh", "\t$Rd, $Rm",
[(set GPR:$Rd,
(sext_inreg
- (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+ (or (srl GPR:$Rm, (i32 8)),
(shl GPR:$Rm, (i32 8))), i16))]>,
Requires<[IsARM, HasV6]>;
+def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+ (shl GPR:$Rm, (i32 8))), i16),
+ (REVSH GPR:$Rm)>;
+
+// Need the AddedComplexity or else MOVs + REV would be chosen.
+let AddedComplexity = 5 in
+def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>;
+
def lsl_shift_imm : SDNodeXForm<imm, [{
unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
return CurDAG->getTargetConstant(Sh, MVT::i32);
}]>;
-def lsl_amt : PatLeaf<(i32 imm), [{
- return (N->getZExtValue() < 32);
+def lsl_amt : ImmLeaf<i32, [{
+ return Imm > 0 && Imm < 32;
}], lsl_shift_imm>;
def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
@@ -3002,8 +3046,8 @@ def asr_shift_imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(Sh, MVT::i32);
}]>;
-def asr_amt : PatLeaf<(i32 imm), [{
- return (N->getZExtValue() <= 32);
+def asr_amt : ImmLeaf<i32, [{
+ return Imm > 0 && Imm <= 32;
}], asr_shift_imm>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
@@ -3119,88 +3163,43 @@ def BCCZi64 : PseudoInst<(outs),
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
-// FIXME: These should all be pseudo-instructions that get expanded to
-// the normal MOV instructions. That would fix the dependency on
-// special casing them in tblgen.
let neverHasSideEffects = 1 in {
-def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm,
- IIC_iCMOVr, "mov", "\t$Rd, $Rm",
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, UnaryDP {
- bits<4> Rd;
- bits<4> Rm;
- let Inst{25} = 0;
- let Inst{20} = 0;
- let Inst{15-12} = Rd;
- let Inst{11-4} = 0b00000000;
- let Inst{3-0} = Rm;
-}
-
-def MOVCCs : AI1<0b1101, (outs GPR:$Rd),
- (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr,
- "mov", "\t$Rd, $shift",
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, UnaryDP {
- bits<4> Rd;
- bits<12> shift;
- let Inst{25} = 0;
- let Inst{20} = 0;
- let Inst{19-16} = 0;
- let Inst{15-12} = Rd;
- let Inst{11-0} = shift;
-}
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+ Size4Bytes, IIC_iCMOVr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
+def MOVCCs : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_reg:$shift, pred:$p),
+ Size4Bytes, IIC_iCMOVsr,
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">;
let isMoveImm = 1 in
-def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm),
- DPFrm, IIC_iMOVi,
- "movw", "\t$Rd, $imm",
- []>,
- RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
- UnaryDP {
- bits<4> Rd;
- bits<16> imm;
- let Inst{25} = 1;
- let Inst{20} = 0;
- let Inst{19-16} = imm{15-12};
- let Inst{15-12} = Rd;
- let Inst{11-0} = imm{11-0};
-}
+def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm_hilo16:$imm, pred:$p),
+ Size4Bytes, IIC_iMOVi,
+ []>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
let isMoveImm = 1 in
-def MOVCCi : AI1<0b1101, (outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
- "mov", "\t$Rd, $imm",
+def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm, pred:$p),
+ Size4Bytes, IIC_iCMOVi,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, UnaryDP {
- bits<4> Rd;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{20} = 0;
- let Inst{19-16} = 0b0000;
- let Inst{15-12} = Rd;
- let Inst{11-0} = imm;
-}
+ RegConstraint<"$false = $Rd">;
// Two instruction predicate mov immediate.
let isMoveImm = 1 in
-def MOVCCi32imm : PseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, i32imm:$src, pred:$p),
- IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, pred:$p),
+ Size8Bytes, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
let isMoveImm = 1 in
-def MVNCCi : AI1<0b1111, (outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
- "mvn", "\t$Rd, $imm",
+def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, so_imm:$imm, pred:$p),
+ Size4Bytes, IIC_iCMOVi,
[/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, UnaryDP {
- bits<4> Rd;
- bits<12> imm;
- let Inst{25} = 1;
- let Inst{20} = 0;
- let Inst{19-16} = 0b0000;
- let Inst{15-12} = Rd;
- let Inst{11-0} = imm;
-}
+ RegConstraint<"$false = $Rd">;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -3221,13 +3220,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
let Inst{31-4} = 0xf57ff05;
let Inst{3-0} = opt;
}
-
-def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
- "mcr", "\tp15, 0, $zero, c7, c10, 5",
- [(ARMMemBarrierMCR GPR:$zero)]>,
- Requires<[IsARM, HasV6]> {
- // FIXME: add encoding
-}
}
def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
@@ -3266,6 +3258,18 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
@@ -3284,6 +3288,18 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
@@ -3302,6 +3318,18 @@ let usesCustomInserter = 1 in {
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
[(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_MIN_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_MAX_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
+ [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
def ATOMIC_SWAP_I8 : PseudoInst<
(outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
@@ -3326,39 +3354,26 @@ let usesCustomInserter = 1 in {
}
let mayLoad = 1 in {
-def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
- "ldrexb", "\t$Rt, [$Rn]",
- []>;
-def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
- "ldrexh", "\t$Rt, [$Rn]",
- []>;
-def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
- "ldrex", "\t$Rt, [$Rn]",
- []>;
-def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn),
- NoItinerary,
- "ldrexd", "\t$Rt, $Rt2, [$Rn]",
- []>;
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+ "ldrexb", "\t$Rt, $addr", []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+ "ldrexh", "\t$Rt, $addr", []>;
+def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary,
+ "ldrex", "\t$Rt, $addr", []>;
+def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr),
+ NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>;
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn),
- NoItinerary,
- "strexb", "\t$Rd, $src, [$Rn]",
- []>;
-def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
- NoItinerary,
- "strexh", "\t$Rd, $Rt, [$Rn]",
- []>;
-def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
- NoItinerary,
- "strex", "\t$Rd, $Rt, [$Rn]",
- []>;
+def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+ NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>;
+def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+ NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
+def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr),
+ NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
def STREXD : AIstrex<0b01, (outs GPR:$Rd),
- (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn),
- NoItinerary,
- "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]",
- []>;
+ (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr),
+ NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>;
}
// Clear-Exclusive is for disassembly only.
@@ -3377,238 +3392,7 @@ def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
}
//===----------------------------------------------------------------------===//
-// TLS Instructions
-//
-
-// __aeabi_read_tp preserves the registers r1-r3.
-// This is a pseudo inst so that we can get the encoding right,
-// complete with fixup for the aeabi_read_tp function.
-let isCall = 1,
- Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
- def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
- [(set R0, ARMthread_pointer)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// SJLJ Exception handling intrinsics
-// eh_sjlj_setjmp() is an instruction sequence to store the return
-// address and save #0 in R0 for the non-longjmp case.
-// Since by its nature we may be coming from some other function to get
-// here, and we're using the stack frame for the containing function to
-// save/restore registers, we can't keep anything live in regs across
-// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
-// when we get here from a longjmp(). We force everthing out of registers
-// except for our own input by listing the relevant registers in Defs. By
-// doing so, we also cause the prologue/epilogue code to actively preserve
-// all of the callee-saved resgisters, which is exactly what we want.
-// A constant value is passed in $val, and we use the location as a scratch.
-//
-// These are pseudo-instructions and are lowered to individual MC-insts, so
-// no encoding information is necessary.
-let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
- D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
- D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
- D31 ], hasSideEffects = 1, isBarrier = 1 in {
- def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
- NoItinerary,
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
- Requires<[IsARM, HasVFP2]>;
-}
-
-let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
- hasSideEffects = 1, isBarrier = 1 in {
- def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
- NoItinerary,
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
- Requires<[IsARM, NoVFP]>;
-}
-
-// FIXME: Non-Darwin version(s)
-let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
- Defs = [ R7, LR, SP ] in {
-def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
- NoItinerary,
- [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
- Requires<[IsARM, IsDarwin]>;
-}
-
-// eh.sjlj.dispatchsetup pseudo-instruction.
-// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
-// handled when the pseudo is expanded (which happens before any passes
-// that need the instruction size).
-let isBarrier = 1, hasSideEffects = 1 in
-def Int_eh_sjlj_dispatchsetup :
- PseudoInst<(outs), (ins GPR:$src), NoItinerary,
- [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
- Requires<[IsDarwin]>;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//
-
-// Large immediate handling.
-
-// 32-bit immediate using two piece so_imms or movw + movt.
-// This is a single pseudo instruction, the benefit is that it can be remat'd
-// as a single unit instead of having to handle reg inputs.
-// FIXME: Remove this when we can do generalized remat.
-let isReMaterializable = 1, isMoveImm = 1 in
-def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
- [(set GPR:$dst, (arm_i32imm:$src))]>,
- Requires<[IsARM]>;
-
-// Pseudo instruction that combines movw + movt + add pc (if PIC).
-// It also makes it possible to rematerialize the instructions.
-// FIXME: Remove this when we can do generalized remat and when machine licm
-// can properly the instructions.
-let isReMaterializable = 1 in {
-def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
- IIC_iMOVix2addpc,
- [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
- Requires<[IsARM, UseMovt]>;
-
-def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
- IIC_iMOVix2,
- [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
- Requires<[IsARM, UseMovt]>;
-
-let AddedComplexity = 10 in
-def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
- IIC_iMOVix2ld,
- [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
- Requires<[IsARM, UseMovt]>;
-} // isReMaterializable
-
-// ConstantPool, GlobalAddress, and JumpTable
-def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
- Requires<[IsARM, DontUseMovt]>;
-def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
-def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
- Requires<[IsARM, UseMovt]>;
-def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
- (LEApcrelJT tjumptable:$dst, imm:$id)>;
-
-// TODO: add,sub,and, 3-instr forms?
-
-// Tail calls
-def : ARMPat<(ARMtcret tcGPR:$dst),
- (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
- (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
- (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret tcGPR:$dst),
- (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
- (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
- (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
-
-// Direct calls
-def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
- Requires<[IsARM, IsNotDarwin]>;
-def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
- Requires<[IsARM, IsDarwin]>;
-
-// zextload i1 -> zextload i8
-def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
-def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
-
-// extload -> zextload
-def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
-def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
-def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
-def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
-
-def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
-
-def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
-def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
-
-// smul* and smla*
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (SMULBB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
- (SMULBB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (SMULTB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
- (SMULTB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (i32 16)),
- (SMULWB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
- (SMULWB GPR:$a, GPR:$b)>;
-
-def : ARMV5TEPat<(add GPR:$acc,
- (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16)))),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
- (mul sext_16_node:$a, sext_16_node:$b)),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
- (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
- (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
- (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
- (mul (sra GPR:$a, (i32 16)),
- (sra (shl GPR:$b, (i32 16)), (i32 16)))),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
- (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
- (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
- (i32 16))),
- (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5TEPat<(add GPR:$acc,
- (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
- (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
-
-//===----------------------------------------------------------------------===//
-// Thumb Support
-//
-
-include "ARMInstrThumb.td"
-
-//===----------------------------------------------------------------------===//
-// Thumb2 Support
-//
-
-include "ARMInstrThumb2.td"
-
-//===----------------------------------------------------------------------===//
-// Floating Point Support
-//
-
-include "ARMInstrVFP.td"
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD (NEON) Support
-//
-
-include "ARMInstrNEON.td"
-
-//===----------------------------------------------------------------------===//
-// Coprocessor Instructions. For disassembly only.
+// Coprocessor Instructions.
//
def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
@@ -3652,17 +3436,18 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
let Inst{23-20} = opc1;
}
-class ACI<dag oops, dag iops, string opc, string asm>
- : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary,
- opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+class ACI<dag oops, dag iops, string opc, string asm,
+ IndexMode im = IndexModeNone>
+ : InoP<oops, iops, AddrModeNone, Size4Bytes, im, BrFrm, NoItinerary,
+ opc, asm, "", [/* For disassembly only; pattern left blank */]> {
let Inst{27-25} = 0b110;
}
-multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
+multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{
def _OFFSET : ACI<(outs),
- (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
- opc, "\tp$cop, cr$CRd, $addr"> {
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> {
let Inst{31-28} = op31_28;
let Inst{24} = 1; // P = 1
let Inst{21} = 0; // W = 0
@@ -3671,8 +3456,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
}
def _PRE : ACI<(outs),
- (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
- opc, "\tp$cop, cr$CRd, $addr!"> {
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> {
let Inst{31-28} = op31_28;
let Inst{24} = 1; // P = 1
let Inst{21} = 1; // W = 1
@@ -3681,8 +3466,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
}
def _POST : ACI<(outs),
- (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
- opc, "\tp$cop, cr$CRd, [$base], $offset"> {
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> {
let Inst{31-28} = op31_28;
let Inst{24} = 0; // P = 0
let Inst{21} = 1; // W = 1
@@ -3691,8 +3476,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
}
def _OPTION : ACI<(outs),
- (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option),
- opc, "\tp$cop, cr$CRd, [$base], $option"> {
+ !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option),
+ ops),
+ !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
let Inst{31-28} = op31_28;
let Inst{24} = 0; // P = 0
let Inst{23} = 1; // U = 1
@@ -3702,8 +3488,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
}
def L_OFFSET : ACI<(outs),
- (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
- !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> {
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> {
let Inst{31-28} = op31_28;
let Inst{24} = 1; // P = 1
let Inst{21} = 0; // W = 0
@@ -3712,8 +3498,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
}
def L_PRE : ACI<(outs),
- (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
- !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> {
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!",
+ IndexModePre> {
let Inst{31-28} = op31_28;
let Inst{24} = 1; // P = 1
let Inst{21} = 1; // W = 1
@@ -3722,8 +3509,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
}
def L_POST : ACI<(outs),
- (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
- !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> {
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops),
+ !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr",
+ IndexModePost> {
let Inst{31-28} = op31_28;
let Inst{24} = 0; // P = 0
let Inst{21} = 1; // W = 1
@@ -3732,8 +3520,10 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
}
def L_OPTION : ACI<(outs),
- (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
- !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> {
+ !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option),
+ ops),
+ !strconcat(!strconcat(opc, "l"), cond),
+ "\tp$cop, cr$CRd, [$base], \\{$option\\}"> {
let Inst{31-28} = op31_28;
let Inst{24} = 0; // P = 0
let Inst{23} = 1; // U = 1
@@ -3743,19 +3533,18 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
}
}
-defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">;
-defm LDC2 : LdStCop<0b1111, 1, "ldc2">;
-defm STC : LdStCop<{?,?,?,?}, 0, "stc">;
-defm STC2 : LdStCop<0b1111, 0, "stc2">;
+defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">;
+defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">;
+defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">;
+defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">;
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register -- for disassembly only
//
-class MovRCopro<string opc, bit direction>
- : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
- GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
- NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
+class MovRCopro<string opc, bit direction, dag oops, dag iops>
+ : ABI<0b1110, oops, iops, NoItinerary, opc,
+ "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
[/* For disassembly only; pattern left blank */]> {
let Inst{20} = direction;
let Inst{4} = 1;
@@ -3775,13 +3564,17 @@ class MovRCopro<string opc, bit direction>
let Inst{19-16} = CRn;
}
-def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
-def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+ (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm,
+ i32imm:$opc2)>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRn, c_imm:$CRm, i32imm:$opc2)>;
-class MovRCopro2<string opc, bit direction>
- : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
- GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
- NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+class MovRCopro2<string opc, bit direction, dag oops, dag iops>
+ : ABXI<0b1110, oops, iops, NoItinerary,
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
[/* For disassembly only; pattern left blank */]> {
let Inst{31-28} = 0b1111;
let Inst{20} = direction;
@@ -3802,8 +3595,14 @@ class MovRCopro2<string opc, bit direction>
let Inst{19-16} = CRn;
}
-def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>;
-def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>;
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
+ (outs), (ins p_imm:$cop, i32imm:$opc1,
+ GPR:$Rt, c_imm:$CRn, c_imm:$CRm,
+ i32imm:$opc2)>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1,
+ c_imm:$CRn, c_imm:$CRm,
+ i32imm:$opc2)>;
class MovRRCopro<string opc, bit direction>
: ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
@@ -3909,3 +3708,241 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
let Inst{15-12} = 0b1111;
let Inst{11-0} = a;
}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1,
+ Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+ def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+ [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everything out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+// A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
+ D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
+ D31 ], hasSideEffects = 1, isBarrier = 1 in {
+ def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1 in {
+ def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+ NoItinerary,
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+ Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+ NoItinerary,
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsARM, IsDarwin]>;
+}
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins), NoItinerary,
+ [(ARMeh_sjlj_dispatchsetup)]>,
+ Requires<[IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece so_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ [(set GPR:$dst, (arm_i32imm:$src))]>,
+ Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2addpc,
+ [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2,
+ [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+ Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+ IIC_iMOVix2ld,
+ [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+ Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+ Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+ (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
+ Requires<[IsARM, IsNotDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
+ Requires<[IsARM, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
+ (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, sext_16_node:$b)),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+ (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)),
+ (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+ (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+ (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+
+// Pre-v7 uses MCR for synchronization barriers.
+def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
+ Requires<[IsARM, HasV6]>;
+
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index dc3d63e..e34d69a 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -80,6 +80,12 @@ def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+def NEONvbsl : SDNode<"ARMISD::VBSL",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>>;
+
def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
// VDUPLANE can produce a quad-register result from a double-register source,
@@ -146,10 +152,6 @@ def VLDMQIA
: PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
IIC_fpLoad_m, "",
[(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
-def VLDMQDB
- : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
- IIC_fpLoad_m, "",
- [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
// Use VSTM to store a Q register as a D register pair.
// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
@@ -157,10 +159,6 @@ def VSTMQIA
: PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
IIC_fpStore_m, "",
[(store (v2f64 QPR:$src), GPR:$Rn)]>;
-def VSTMQDB
- : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
- IIC_fpStore_m, "",
- [(store (v2f64 QPR:$src), GPR:$Rn)]>;
// Classes for VLD* pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
@@ -1801,7 +1799,7 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VDSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
- : N3V<0, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set (Ty DPR:$Vd),
@@ -1811,7 +1809,7 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
- : N3V<0, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
[(set (Ty DPR:$Vd),
@@ -1841,7 +1839,7 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VQSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
- : N3V<1, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set (ResTy QPR:$Vd),
@@ -1852,7 +1850,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
}
class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
- : N3V<1, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
[(set (ResTy QPR:$Vd),
@@ -1874,7 +1872,7 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
}
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
- : N3V<0, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set (Ty DPR:$Vd),
@@ -1885,7 +1883,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
}
class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
- : N3V<0, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set (Ty DPR:$Vd),
@@ -1915,7 +1913,7 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N3V<1, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set (ResTy QPR:$Vd),
@@ -1927,7 +1925,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N3V<1, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set (ResTy QPR:$Vd),
@@ -1959,7 +1957,7 @@ class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
- : N3V<0, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
@@ -1972,7 +1970,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode ShOp>
- : N3V<0, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
@@ -1994,7 +1992,7 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator MulOp, SDPatternOperator ShOp>
- : N3V<1, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
@@ -2008,7 +2006,7 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy,
SDNode MulOp, SDNode ShOp>
- : N3V<1, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
@@ -2069,7 +2067,7 @@ class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
- : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
@@ -2081,7 +2079,7 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
- : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
@@ -2116,7 +2114,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
@@ -2129,7 +2127,7 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
@@ -2164,7 +2162,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
- : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set QPR:$Vd,
@@ -2173,7 +2171,7 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
- : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set QPR:$Vd,
@@ -2219,7 +2217,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set (ResTy QPR:$Vd),
@@ -2229,7 +2227,7 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
[(set (ResTy QPR:$Vd),
@@ -2288,17 +2286,17 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
// Shift by immediate,
// both double- and quad-register.
class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, SDNode OpNode>
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
- (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin,
+ (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- Format f, InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, SDNode OpNode>
+ Format f, InstrItinClass itin, Operand ImmTy,
+ string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
- (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin,
+ (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
@@ -2315,9 +2313,9 @@ class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
// Narrow shift by immediate.
class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin,
+ (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
[(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
(i32 imm:$SIMM))))]>;
@@ -2325,16 +2323,18 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
// Shift right by immediate and accumulate,
// both double- and quad-register.
class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
- (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set DPR:$Vd, (Ty (add DPR:$src1,
(Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ Operand ImmTy, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
- (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (add QPR:$src1,
(Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
@@ -2342,15 +2342,17 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// Shift by immediate and insert,
// both double- and quad-register.
class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
- (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD,
+ (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
+ Operand ImmTy, Format f, string OpcodeStr, string Dt,
+ ValueType Ty,SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
- (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ,
+ (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
[(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
@@ -3010,40 +3012,77 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// Neon 2-register vector shift by immediate,
// with f of either N2RegVShLFrm or N2RegVShRFrm
// element sizes of 8, 16, 32 and 64 bits:
-multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- SDNode OpNode, Format f> {
+multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
+ OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
// 64-bit vector types.
- def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin,
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
// imm6 = xxxxxx
// 128-bit vector types.
- def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin,
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
}
@@ -3053,79 +3092,113 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, SDNode ShOp> {
// 64-bit vector types.
- def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+ def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+ def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+ def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4,
+ def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
// imm6 = xxxxxx
// 128-bit vector types.
- def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+ def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+ def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+ def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4,
+ def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
// imm6 = xxxxxx
}
-
// Neon Shift-Insert vector operations,
// with f of either N2RegVShLFrm or N2RegVShRFrm
// element sizes of 8, 16, 32 and 64 bits:
-multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, SDNode ShOp,
- Format f> {
+multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
+ // 64-bit vector types.
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+ // imm6 = xxxxxx
+
+ // 128-bit vector types.
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+ let Inst{21-19} = 0b001; // imm6 = 001xxx
+ }
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+ let Inst{21-20} = 0b01; // imm6 = 01xxxx
+ }
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+ let Inst{21} = 0b1; // imm6 = 1xxxxx
+ }
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
+ N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+ // imm6 = xxxxxx
+}
+multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr> {
// 64-bit vector types.
- def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4,
- f, OpcodeStr, "8", v8i8, ShOp> {
+ def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
- f, OpcodeStr, "16", v4i16, ShOp> {
+ def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
- f, OpcodeStr, "32", v2i32, ShOp> {
+ def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
- f, OpcodeStr, "64", v1i64, ShOp>;
+ def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
// imm6 = xxxxxx
// 128-bit vector types.
- def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
- f, OpcodeStr, "8", v16i8, ShOp> {
+ def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
+ N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
- f, OpcodeStr, "16", v8i16, ShOp> {
+ def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
+ N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
- f, OpcodeStr, "32", v4i32, ShOp> {
+ def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
+ N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
- f, OpcodeStr, "64", v2i64, ShOp>;
+ def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
+ N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
// imm6 = xxxxxx
}
@@ -3153,15 +3226,18 @@ multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
SDNode OpNode> {
def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
- OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> {
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, shr_imm8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
- OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> {
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, shr_imm16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
- OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> {
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, shr_imm32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
}
@@ -3697,16 +3773,21 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VCNTiD,
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [(set DPR:$Vd,
- (v2i32 (or (and DPR:$Vn, DPR:$src1),
- (and DPR:$Vm, (vnotd DPR:$src1)))))]>;
+ [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+
+def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
+ (and DPR:$Vm, (vnotd DPR:$Vd)))),
+ (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VCNTiQ,
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
- [(set QPR:$Vd,
- (v4i32 (or (and QPR:$Vn, QPR:$src1),
- (and QPR:$Vm, (vnotq QPR:$src1)))))]>;
+ [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+
+def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
+ (and QPR:$Vm, (vnotq QPR:$Vd)))),
+ (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@@ -3917,14 +3998,13 @@ defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
"vshl", "u", int_arm_neon_vshiftu>;
+
// VSHL : Vector Shift Left (Immediate)
-defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
- N2RegVShLFrm>;
+defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+
// VSHR : Vector Shift Right (Immediate)
-defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs,
- N2RegVShRFrm>;
-defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru,
- N2RegVShRFrm>;
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
@@ -3957,10 +4037,8 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
- N2RegVShRFrm>;
-defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru,
- N2RegVShRFrm>;
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
@@ -3974,13 +4052,11 @@ defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vqshl", "u", int_arm_neon_vqshiftu>;
// VQSHL : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
- N2RegVShLFrm>;
-defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu,
- N2RegVShLFrm>;
+defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
+defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu,
- N2RegVShLFrm>;
+defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
// VQSHRN : Vector Saturating Shift Right and Narrow
defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
@@ -4018,9 +4094,10 @@ defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
// VSLI : Vector Shift Left and Insert
-defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>;
+defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
+
// VSRI : Vector Shift Right and Insert
-defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
+defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
// Vector Absolute and Saturating Absolute.
@@ -4362,14 +4439,8 @@ def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>;
def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>;
def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>;
-def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R),
- IIC_VMOVIS, "vdup", "32", "$V, $R",
- [(set DPR:$V, (v2f32 (NEONvdup
- (f32 (bitconvert GPR:$R)))))]>;
-def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R),
- IIC_VMOVIS, "vdup", "32", "$V, $R",
- [(set QPR:$V, (v4f32 (NEONvdup
- (f32 (bitconvert GPR:$R)))))]>;
+def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>;
+def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>;
// VDUP : Vector Duplicate Lane (from scalar to all elements)
@@ -4397,9 +4468,6 @@ def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
let Inst{19} = lane{0};
}
-def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> {
- let Inst{19} = lane{0};
-}
def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
let Inst{19-17} = lane{2-0};
}
@@ -4409,9 +4477,12 @@ def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
let Inst{19} = lane{0};
}
-def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> {
- let Inst{19} = lane{0};
-}
+
+def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32d DPR:$Vm, imm:$lane)>;
+
+def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)),
+ (VDUPLN32q DPR:$Vm, imm:$lane)>;
def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -4426,7 +4497,7 @@ def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
- (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src,
+ (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
@@ -4517,12 +4588,12 @@ class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>;
def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
-def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>;
+def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>;
def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
-def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>;
+def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>;
// VREV32 : Vector Reverse elements within 32-bit words
@@ -4628,8 +4699,8 @@ def VEXTq32 : VEXTq<"vext", "32", v4i32> {
let Inst{9-8} = 0b00;
}
def VEXTqf : VEXTq<"vext", "32", v4f32> {
- let Inst{11} = index{0};
- let Inst{10-8} = 0b000;
+ let Inst{11-10} = index{1-0};
+ let Inst{9-8} = 0b00;
}
// VTRN : Vector Transpose
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 826ef46..8c542fe 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -27,22 +27,22 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{
}]>;
/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
-def imm0_7 : PatLeaf<(i32 imm), [{
- return (uint32_t)N->getZExtValue() < 8;
+def imm0_7 : ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 8;
}]>;
def imm0_7_neg : PatLeaf<(i32 imm), [{
return (uint32_t)-N->getZExtValue() < 8;
}], imm_neg_XFORM>;
-def imm0_255 : PatLeaf<(i32 imm), [{
- return (uint32_t)N->getZExtValue() < 256;
+def imm0_255 : ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 256;
}]>;
def imm0_255_comp : PatLeaf<(i32 imm), [{
return ~((uint32_t)N->getZExtValue()) < 256;
}]>;
-def imm8_255 : PatLeaf<(i32 imm), [{
- return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256;
+def imm8_255 : ImmLeaf<i32, [{
+ return Imm >= 8 && Imm < 256;
}]>;
def imm8_255_neg : PatLeaf<(i32 imm), [{
unsigned Val = -N->getZExtValue();
@@ -369,6 +369,15 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
let Inst{2-0} = 0b000;
}
+ def tBX_Rm : TI<(outs), (ins pred:$p, GPR:$Rm), IIC_Br, "bx${p}\t$Rm",
+ [/* for disassembly only */]>,
+ T1Special<{1,1,0,?}> {
+ // A6.2.3 & A8.6.25
+ bits<4> Rm;
+ let Inst{6-3} = Rm;
+ let Inst{2-0} = 0b000;
+ }
+
// Alternative return instruction used by vararg functions.
def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm),
IIC_Br, "bx\t$Rm",
@@ -712,6 +721,19 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
let Inst{7-0} = addr;
}
+// FIXME: Remove this entry when the above ldr.n workaround is fixed.
+// For disassembly use only.
+def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [/* disassembly only */]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
// A8.6.194 & A8.6.192
defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
t_addrmode_is4, AddrModeT1_4,
@@ -1175,10 +1197,18 @@ def tREVSH : // A8.6.136
"revsh", "\t$Rd, $Rm",
[(set tGPR:$Rd,
(sext_inreg
- (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
+ (or (srl tGPR:$Rm, (i32 8)),
(shl tGPR:$Rm, (i32 8))), i16))]>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
+ (shl tGPR:$Rm, (i32 8))), i16),
+ (tREVSH tGPR:$Rm)>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>,
+ Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
// Rotate right register
def tROR : // A8.6.139
T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
@@ -1322,10 +1352,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
// Move between coprocessor and ARM core register -- for disassembly only
//
-class tMovRCopro<string opc, bit direction>
- : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
- GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
- !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+class tMovRCopro<string opc, bit direction, dag oops, dag iops>
+ : T1Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
[/* For disassembly only; pattern left blank */]> {
let Inst{27-24} = 0b1110;
let Inst{20} = direction;
@@ -1346,8 +1374,12 @@ class tMovRCopro<string opc, bit direction>
let Inst{19-16} = CRn;
}
-def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
-def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
+ (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, i32imm:$opc2)>;
+def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
+ c_imm:$CRm, i32imm:$opc2)>;
class tMovRRCopro<string opc, bit direction>
: T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
@@ -1420,7 +1452,7 @@ def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
// from some other function to get here, and we're using the stack frame for the
// containing function to save/restore registers, we can't keep anything live in
// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
-// tromped upon when we get here from a longjmp(). We force everthing out of
+// tromped upon when we get here from a longjmp(). We force everything out of
// registers except for our own input by listing the relevant registers in
// Defs. By doing so, we also cause the prologue/epilogue code to actively
// preserve all of the callee-saved resgisters, which is exactly what we want.
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 0e01be5..600a121 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -44,7 +44,9 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
-def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> {
+def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ }]> {
let EncoderMethod = "getT2SOImmOpValue";
}
@@ -61,49 +63,15 @@ def t2_so_imm_neg : Operand<i32>,
return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
}], t2_so_imm_neg_XFORM>;
-// Break t2_so_imm's up into two pieces. This handles immediates with up to 16
-// bits set in them. This uses t2_so_imm2part to match and t2_so_imm2part_[12]
-// to get the first/second pieces.
-def t2_so_imm2part : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue());
- }]> {
-}
-
-def t2_so_imm2part_1 : SDNodeXForm<imm, [{
- unsigned V = ARM_AM::getT2SOImmTwoPartFirst((unsigned)N->getZExtValue());
- return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
-
-def t2_so_imm2part_2 : SDNodeXForm<imm, [{
- unsigned V = ARM_AM::getT2SOImmTwoPartSecond((unsigned)N->getZExtValue());
- return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
-
-def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
- return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue());
- }]> {
-}
-
-def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{
- unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue());
- return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
-
-def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{
- unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue());
- return CurDAG->getTargetConstant(V, MVT::i32);
-}]>;
-
/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
-def imm1_31 : PatLeaf<(i32 imm), [{
- return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32;
+def imm1_31 : ImmLeaf<i32, [{
+ return (int32_t)Imm >= 1 && (int32_t)Imm < 32;
}]>;
/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
def imm0_4095 : Operand<i32>,
- PatLeaf<(i32 imm), [{
- return (uint32_t)N->getZExtValue() < 4096;
+ ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 4096;
}]>;
def imm0_4095_neg : PatLeaf<(i32 imm), [{
@@ -118,6 +86,11 @@ def imm0_255_not : PatLeaf<(i32 imm), [{
return (uint32_t)(~N->getZExtValue()) < 255;
}], imm_comp_XFORM>;
+def lo5AllOne : PatLeaf<(i32 imm), [{
+ // Returns true if all low 5-bits are 1.
+ return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL;
+}]>;
+
// Define Thumb2 specific addressing modes.
// t2addrmode_imm12 := reg + imm12
@@ -129,6 +102,12 @@ def t2addrmode_imm12 : Operand<i32>,
let ParserMatchClass = MemMode5AsmOperand;
}
+// t2ldrlabel := imm12
+def t2ldrlabel : Operand<i32> {
+ let EncoderMethod = "getAddrModeImm12OpValue";
+}
+
+
// ADR instruction labels.
def t2adrlabel : Operand<i32> {
let EncoderMethod = "getT2AdrLabelOpValue";
@@ -173,6 +152,15 @@ def t2addrmode_so_reg : Operand<i32>,
let ParserMatchClass = MemMode5AsmOperand;
}
+// t2addrmode_reg := reg
+// Used by load/store exclusive instructions. Useful to enable right assembly
+// parsing and printing. Not used for any codegen matching.
+//
+def t2addrmode_reg : Operand<i32> {
+ let PrintMethod = "printAddrMode7Operand";
+ let MIOperandInfo = (ops tGPR);
+ let ParserMatchClass = MemMode7AsmOperand;
+}
//===----------------------------------------------------------------------===//
// Multiclass helpers...
@@ -700,49 +688,27 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{24-21} = opcod;
}
}
+}
// Carry setting variants
-let isCodeGenOnly = 1, Defs = [CPSR] in {
-multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
+// NOTE: CPSR def omitted because it will be handled by the custom inserter.
+let usesCustomInserter = 1 in {
+multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
// shifted imm
- def ri : T2sTwoRegImm<
- (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
- opc, "\t$Rd, $Rn, $imm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- let Inst{15} = 0;
- }
+ def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+ Size4Bytes, IIC_iALUi,
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
// register
- def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
- opc, ".w\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[IsThumb2]> {
+ def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+ Size4Bytes, IIC_iALUr,
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
let isCommutable = Commutable;
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- let Inst{14-12} = 0b000; // imm3
- let Inst{7-6} = 0b00; // imm2
- let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sTwoRegShiftedReg<
- (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
- IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = opcod;
- let Inst{20} = 1; // The S bit.
- }
-}
+ def rs : t2PseudoInst<
+ (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+ Size4Bytes, IIC_iALUsi,
+ [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
}
}
@@ -864,6 +830,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let Inst{15-12} = Rt;
bits<17> addr;
+ let addr{12} = 1; // add = TRUE
let Inst{19-16} = addr{16-13}; // Rn
let Inst{23} = addr{12}; // U
let Inst{11-0} = addr{11-0}; // imm
@@ -911,7 +878,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
}
// FIXME: Is the pci variant actually needed?
- def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii,
+ def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii,
opc, ".w\t$Rt, $addr",
[(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
let isReMaterializable = 1;
@@ -944,6 +911,7 @@ multiclass T2I_st<bits<2> opcod, string opc,
let Inst{15-12} = Rt;
bits<17> addr;
+ let addr{12} = 1; // add = TRUE
let Inst{19-16} = addr{16-13}; // Rn
let Inst{23} = addr{12}; // U
let Inst{11-0} = addr{11-0}; // imm
@@ -1398,7 +1366,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
// for disassembly only.
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
"\t$Rt, $addr", []> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -1440,42 +1408,48 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
(ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
- "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+ "str", "\t$Rt, [$Rn, $addr]!",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
[(set GPR:$base_wb,
(pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
(ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
- "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+ "str", "\t$Rt, [$Rn], $addr",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
[(set GPR:$base_wb,
(post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
(ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
- "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+ "strh", "\t$Rt, [$Rn, $addr]!",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
[(set GPR:$base_wb,
(pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
(ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
- "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+ "strh", "\t$Rt, [$Rn], $addr",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
[(set GPR:$base_wb,
(post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
(ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
- "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+ "strb", "\t$Rt, [$Rn, $addr]!",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
[(set GPR:$base_wb,
(pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
(ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
- "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+ "strb", "\t$Rt, [$Rn], $addr",
+ "$Rn = $base_wb,@earlyclobber $base_wb",
[(set GPR:$base_wb,
(post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
@@ -1483,7 +1457,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
class T2IstT<bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+ : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
"\t$Rt, $addr", []> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -1508,20 +1482,20 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
// For disassembly only.
-def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
-def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs),
- (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+ (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
- (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+ (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
@@ -1541,6 +1515,7 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let Inst{15-12} = 0b1111;
bits<17> addr;
+ let addr{12} = 1; // add = TRUE
let Inst{19-16} = addr{16-13}; // Rn
let Inst{23} = addr{12}; // U
let Inst{11-0} = addr{11-0}; // imm12
@@ -1813,10 +1788,8 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
-defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
- BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
-defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
- BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
@@ -1847,9 +1820,14 @@ def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
let AddedComplexity = 1 in
-def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
+def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm),
+ (t2SBCri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm),
+ (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm),
(t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
+def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm),
(t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only
@@ -2052,6 +2030,10 @@ defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>;
defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
+def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
+ (t2RORrr rGPR:$lhs, rGPR:$rhs)>;
+
let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"rrx", "\t$Rd, $Rm",
@@ -2140,10 +2122,12 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
IIC_iUNAsi, "bfc", "\t$Rd, $imm",
[(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
bits<10> imm;
let msb{4-0} = imm{9-5};
@@ -2176,9 +2160,11 @@ let Constraints = "$src = $Rd" in {
[(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
bits<10> imm;
let msb{4-0} = imm{9-5};
@@ -2193,9 +2179,11 @@ let Constraints = "$src = $Rd" in {
IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
[]> {
let Inst{31-27} = 0b11110;
+ let Inst{26} = 0; // should be 0.
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
let Inst{15} = 0;
+ let Inst{5} = 0; // should be 0.
bits<5> lsbit;
bits<5> width;
@@ -2607,9 +2595,15 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"revsh", ".w\t$Rd, $Rm",
[(set rGPR:$Rd,
(sext_inreg
- (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
+ (or (srl rGPR:$Rm, (i32 8)),
(shl rGPR:$Rm, (i32 8))), i16))]>;
+def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
+ (shl rGPR:$Rm, (i32 8))), i16),
+ (t2REVSH rGPR:$Rm)>;
+
+def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>;
+
def t2PKHBT : T2ThreeReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
@@ -2843,9 +2837,9 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Inst{5-4} = opcod;
let Inst{3-0} = 0b1111;
- bits<4> Rn;
+ bits<4> addr;
bits<4> Rt;
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rt;
}
class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -2859,37 +2853,37 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Inst{5-4} = opcod;
bits<4> Rd;
- bits<4> Rn;
+ bits<4> addr;
bits<4> Rt;
- let Inst{11-8} = Rd;
- let Inst{19-16} = Rn;
+ let Inst{3-0} = Rd;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rt;
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
- Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]",
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
+ Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, $addr",
"", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
- Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]",
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
+ Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, $addr",
"", []>;
-def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
Size4Bytes, NoItinerary,
- "ldrex", "\t$Rt, [$Rn]", "",
+ "ldrex", "\t$Rt, $addr", "",
[]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000101;
let Inst{11-8} = 0b1111;
let Inst{7-0} = 0b00000000; // imm8 = 0
- bits<4> Rn;
bits<4> Rt;
- let Inst{19-16} = Rn;
+ bits<4> addr;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rt;
}
-def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn),
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_reg:$addr),
AddrModeNone, Size4Bytes, NoItinerary,
- "ldrexd", "\t$Rt, $Rt2, [$Rn]", "",
+ "ldrexd", "\t$Rt, $Rt2, $addr", "",
[], {?, ?, ?, ?}> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
@@ -2897,31 +2891,31 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn),
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
- AddrModeNone, Size4Bytes, NoItinerary,
- "strexb", "\t$Rd, $Rt, [$Rn]", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
- AddrModeNone, Size4Bytes, NoItinerary,
- "strexh", "\t$Rd, $Rt, [$Rn]", "", []>;
-def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
- AddrModeNone, Size4Bytes, NoItinerary,
- "strex", "\t$Rd, $Rt, [$Rn]", "",
- []> {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexb", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexh", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strex", "\t$Rd, $Rt, $addr", "",
+ []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000100;
let Inst{7-0} = 0b00000000; // imm8 = 0
bits<4> Rd;
- bits<4> Rn;
+ bits<4> addr;
bits<4> Rt;
let Inst{11-8} = Rd;
- let Inst{19-16} = Rn;
+ let Inst{19-16} = addr;
let Inst{15-12} = Rt;
}
def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
- (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr),
AddrModeNone, Size4Bytes, NoItinerary,
- "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [],
+ "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
{?, ?, ?, ?}> {
bits<4> Rt2;
let Inst{11-8} = Rt2;
@@ -2965,7 +2959,7 @@ let isCall = 1,
// here, and we're using the stack frame for the containing function to
// save/restore registers, we can't keep anything live in regs across
// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
-// when we get here from a longjmp(). We force everthing out of registers
+// when we get here from a longjmp(). We force everything out of registers
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved resgisters, which is exactly what we want.
@@ -3238,19 +3232,20 @@ class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
bits<4> Rn;
let Inst{19-16} = Rn;
+ let Inst{15-0} = 0xc000;
}
def t2RFEDBW : T2RFE<0b111010000011,
- (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
[/* For disassembly only; pattern left blank */]>;
def t2RFEDB : T2RFE<0b111010000001,
- (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn",
+ (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn",
[/* For disassembly only; pattern left blank */]>;
def t2RFEIAW : T2RFE<0b111010011011,
- (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
[/* For disassembly only; pattern left blank */]>;
def t2RFEIA : T2RFE<0b111010011001,
- (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+ (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
[/* For disassembly only; pattern left blank */]>;
//===----------------------------------------------------------------------===//
@@ -3352,10 +3347,8 @@ def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
// Move between coprocessor and ARM core register -- for disassembly only
//
-class t2MovRCopro<string opc, bit direction>
- : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
- GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
- !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+class t2MovRCopro<string opc, bit direction, dag oops, dag iops>
+ : T2Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
[/* For disassembly only; pattern left blank */]> {
let Inst{27-24} = 0b1110;
let Inst{20} = direction;
@@ -3376,8 +3369,12 @@ class t2MovRCopro<string opc, bit direction>
let Inst{19-16} = CRn;
}
-def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>;
-def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>;
+def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */,
+ (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
+ c_imm:$CRm, i32imm:$opc2)>;
+def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */,
+ (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
+ c_imm:$CRm, i32imm:$opc2)>;
class t2MovRRCopro<string opc, bit direction>
: T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 2990283..376bd96 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -101,14 +101,6 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
let Inst{21} = 1; // Writeback
let Inst{20} = L_bit;
}
- def DDB :
- AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
- IndexModeNone, itin,
- !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
- let Inst{24-23} = 0b10; // Decrement Before
- let Inst{21} = 0; // No writeback
- let Inst{20} = L_bit;
- }
def DDB_UPD :
AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
IndexModeUpd, itin_upd,
@@ -143,18 +135,6 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
// VFP pipelines.
let D = VFPNeonDomain;
}
- def SDB :
- AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
- IndexModeNone, itin,
- !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
- let Inst{24-23} = 0b10; // Decrement Before
- let Inst{21} = 0; // No writeback
- let Inst{20} = L_bit;
-
- // Some single precision VFP instructions may be executed on both NEON and
- // VFP pipelines.
- let D = VFPNeonDomain;
- }
def SDB_UPD :
AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
IndexModeUpd, itin_upd,
@@ -467,6 +447,10 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010,
let Inst{6-5} = 0b00;
let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
def VMOVSR : AVConv4I<0b11100000, 0b1010,
@@ -484,6 +468,10 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
let Inst{6-5} = 0b00;
let Inst{3-0} = 0b0000;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
let neverHasSideEffects = 1 in {
@@ -503,6 +491,10 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
let Inst{19-16} = Rt2;
let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
@@ -510,6 +502,10 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
[/* For disassembly only; pattern left blank */]> {
let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
} // neverHasSideEffects
@@ -532,6 +528,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
let Inst{19-16} = Rt2;
let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
let neverHasSideEffects = 1 in
@@ -540,6 +540,10 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010,
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
[/* For disassembly only; pattern left blank */]> {
let Inst{7-6} = 0b00;
+
+ // Some single precision VFP instructions may be executed on both NEON and VFP
+ // pipelines.
+ let D = VFPNeonDomain;
}
// FMRDH: SPR -> GPR
@@ -972,33 +976,15 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
//
let neverHasSideEffects = 1 in {
-def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
- (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
- IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm",
+def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
+ Size4Bytes, IIC_fpUNA64,
[/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
RegConstraint<"$Dn = $Dd">;
-def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
- (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
- IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm",
+def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
+ Size4Bytes, IIC_fpUNA32,
[/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
RegConstraint<"$Sn = $Sd">;
-
-def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
- (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
- IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
- [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
- RegConstraint<"$Dn = $Dd">;
-
-def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
- (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
- IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
- [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
- RegConstraint<"$Sn = $Sd"> {
- // Some single precision VFP instructions may be executed on both NEON and
- // VFP pipelines on A8.
- let D = VFPNeonA8Domain;
-}
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index d9dc5cd..df89fad 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -79,7 +79,7 @@ namespace {
unsigned Position;
MachineBasicBlock::iterator MBBI;
bool Merged;
- MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
MachineBasicBlock::iterator i)
: Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
};
@@ -174,7 +174,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMSIA;
- case ARM_AM::db: return ARM::VLDMSDB;
+ case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
}
break;
case ARM::VSTRS:
@@ -182,7 +182,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMSIA;
- case ARM_AM::db: return ARM::VSTMSDB;
+ case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
}
break;
case ARM::VLDRD:
@@ -190,7 +190,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMDIA;
- case ARM_AM::db: return ARM::VLDMDDB;
+ case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
}
break;
case ARM::VSTRD:
@@ -198,7 +198,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMDIA;
- case ARM_AM::db: return ARM::VSTMDDB;
+ case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
}
break;
}
@@ -246,13 +246,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
case ARM::t2LDMDB_UPD:
case ARM::t2STMDB:
case ARM::t2STMDB_UPD:
- case ARM::VLDMSDB:
case ARM::VLDMSDB_UPD:
- case ARM::VSTMSDB:
case ARM::VSTMSDB_UPD:
- case ARM::VLDMDDB:
case ARM::VLDMDDB_UPD:
- case ARM::VSTMDDB:
case ARM::VSTMDDB_UPD:
return ARM_AM::db;
@@ -312,6 +308,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
else if (Offset != 0) {
+ // Check if this is a supported opcode before we insert instructions to
+ // calculate a new base register.
+ if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
@@ -354,6 +354,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+ if (!Opcode) return false;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
.addReg(Base, getKillRegState(BaseKill))
.addImm(Pred).addReg(PredReg);
@@ -453,6 +454,25 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned PRegNum = PMO.isUndef() ? UINT_MAX
: getARMRegisterNumbering(PReg);
unsigned Count = 1;
+ unsigned Limit = ~0U;
+
+ // vldm / vstm limit are 32 for S variants, 16 for D variants.
+
+ switch (Opcode) {
+ default: break;
+ case ARM::VSTRS:
+ Limit = 32;
+ break;
+ case ARM::VSTRD:
+ Limit = 16;
+ break;
+ case ARM::VLDRD:
+ Limit = 16;
+ break;
+ case ARM::VLDRS:
+ Limit = 32;
+ break;
+ }
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
int NewOffset = MemOps[i].Offset;
@@ -460,13 +480,13 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX
: getARMRegisterNumbering(Reg);
- // Register numbers must be in ascending order. For VFP, the registers
- // must also be consecutive and there is a limit of 16 double-word
- // registers per instruction.
+ // Register numbers must be in ascending order. For VFP / NEON load and
+ // store multiples, the registers must also be consecutive and within the
+ // limit on the number of registers per instruction.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
- ((isNotVFP && RegNum > PRegNum)
- || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
+ ((isNotVFP && RegNum > PRegNum) ||
+ ((Count < Limit) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
++Count;
@@ -567,14 +587,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::t2STMIA:
case ARM::t2STMDB:
case ARM::VLDMSIA:
- case ARM::VLDMSDB:
case ARM::VSTMSIA:
- case ARM::VSTMSDB:
return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
case ARM::VLDMDIA:
- case ARM::VLDMDDB:
case ARM::VSTMDIA:
- case ARM::VSTMDDB:
return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
}
}
@@ -624,7 +640,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
break;
case ARM::VLDMSIA:
- case ARM::VLDMSDB:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMSIA_UPD;
@@ -632,7 +647,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
break;
case ARM::VLDMDIA:
- case ARM::VLDMDDB:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMDIA_UPD;
@@ -640,7 +654,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
break;
case ARM::VSTMSIA:
- case ARM::VSTMSDB:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMSIA_UPD;
@@ -648,7 +661,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
break;
case ARM::VSTMDIA:
- case ARM::VSTMDDB:
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMDIA_UPD;
@@ -749,7 +761,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MIB.addOperand(MI->getOperand(OpNum));
// Transfer memoperands.
- (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
MBB.erase(MBBI);
return true;
@@ -1275,14 +1287,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
CurrPred, CurrPredReg, Scratch, MemOps, Merges);
- // Try folding preceeding/trailing base inc/dec into the generated
+ // Try folding preceding/trailing base inc/dec into the generated
// LDM/STM ops.
for (unsigned i = 0, e = Merges.size(); i < e; ++i)
if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
++NumMerges;
NumMerges += Merges.size();
- // Try folding preceeding/trailing base inc/dec into those load/store
+ // Try folding preceding/trailing base inc/dec into those load/store
// that were not merged to form LDM/STM ops.
for (unsigned i = 0; i != NumMemOps; ++i)
if (!MemOps[i].Merged)
@@ -1292,7 +1304,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// RS may be pointing to an instruction that's deleted.
RS->skipTo(prior(MBBI));
} else if (NumMemOps == 1) {
- // Try folding preceeding/trailing base inc/dec into the single
+ // Try folding preceding/trailing base inc/dec into the single
// load/store.
if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
++NumMerges;
@@ -1322,7 +1334,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
-/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
+/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
/// directly restore the value of LR into pc.
/// ldmfd sp!, {..., lr}
/// bx lr
@@ -1530,15 +1542,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
// Then make sure the immediate offset fits.
int OffImm = getMemoryOpOffset(Op0);
if (isT2) {
- if (OffImm < 0) {
- if (OffImm < -255)
- // Can't fall back to t2LDRi8 / t2STRi8.
- return false;
- } else {
- int Limit = (1 << 8) * Scale;
- if (OffImm >= Limit || (OffImm & (Scale-1)))
- return false;
- }
+ int Limit = (1 << 8) * Scale;
+ if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
+ return false;
Offset = OffImm;
} else {
ARM_AM::AddrOpc AddSub = ARM_AM::add;
diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp
index 53edfca..a3f89e9 100644
--- a/lib/Target/ARM/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/ARMMCAsmInfo.cpp
@@ -12,8 +12,16 @@
//===----------------------------------------------------------------------===//
#include "ARMMCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+
using namespace llvm;
+cl::opt<bool>
+EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
+ cl::desc("Generate ARM EHABI tables"),
+ cl::init(false));
+
+
static const char *const arm_asm_table[] = {
"{r0}", "r0",
"{r1}", "r1",
@@ -65,4 +73,8 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
DwarfRequiresFrameSection = false;
SupportsDebugInformation = true;
+
+ // Exceptions handling
+ if (EnableARMEHABI)
+ ExceptionsType = ExceptionHandling::ARM;
}
diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp
index 6d7b485..10607b1 100644
--- a/lib/Target/ARM/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -278,6 +278,15 @@ public:
unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
unsigned EncodedValue) const;
unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
@@ -1201,6 +1210,30 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
return MO.getReg();
}
+unsigned ARMMCCodeEmitter::
+getShiftRight8Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight16Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight32Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight64Imm(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+}
+
void ARMMCCodeEmitter::
EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
diff --git a/lib/Target/ARM/ARMMCExpr.h b/lib/Target/ARM/ARMMCExpr.h
index d42f766..0a2e883 100644
--- a/lib/Target/ARM/ARMMCExpr.h
+++ b/lib/Target/ARM/ARMMCExpr.h
@@ -60,6 +60,9 @@ public:
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const;
void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const {
+ return getSubExpr()->FindAssociatedSection();
+ }
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index ad51bc1..1cba1ba 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -12,26 +12,8 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMAddressingModes.h"
#include "ARMBaseInstrInfo.h"
-#include "ARMInstrInfo.h"
-#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
-#include "ARMSubtarget.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
using namespace llvm;
ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 22d15b5..54bf82a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -70,6 +70,8 @@ def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>;
def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>;
def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>;
def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>;
+// These require 32-bit instructions.
+let CostPerUse = 1 in {
def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>;
def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>;
def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
@@ -78,6 +80,7 @@ def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>;
def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>;
def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>;
+}
// Float registers
def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">;
@@ -99,33 +102,41 @@ def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
// Aliases of the F* registers used to hold 64-bit fp values (doubles)
let SubRegIndices = [ssub_0, ssub_1] in {
-def D0 : ARMReg< 0, "d0", [S0, S1]>;
-def D1 : ARMReg< 1, "d1", [S2, S3]>;
-def D2 : ARMReg< 2, "d2", [S4, S5]>;
-def D3 : ARMReg< 3, "d3", [S6, S7]>;
-def D4 : ARMReg< 4, "d4", [S8, S9]>;
-def D5 : ARMReg< 5, "d5", [S10, S11]>;
-def D6 : ARMReg< 6, "d6", [S12, S13]>;
-def D7 : ARMReg< 7, "d7", [S14, S15]>;
-def D8 : ARMReg< 8, "d8", [S16, S17]>;
-def D9 : ARMReg< 9, "d9", [S18, S19]>;
-def D10 : ARMReg<10, "d10", [S20, S21]>;
-def D11 : ARMReg<11, "d11", [S22, S23]>;
-def D12 : ARMReg<12, "d12", [S24, S25]>;
-def D13 : ARMReg<13, "d13", [S26, S27]>;
-def D14 : ARMReg<14, "d14", [S28, S29]>;
-def D15 : ARMReg<15, "d15", [S30, S31]>;
+def D0 : ARMReg< 0, "d0", [S0, S1]>, DwarfRegNum<[256]>;
+def D1 : ARMReg< 1, "d1", [S2, S3]>, DwarfRegNum<[257]>;
+def D2 : ARMReg< 2, "d2", [S4, S5]>, DwarfRegNum<[258]>;
+def D3 : ARMReg< 3, "d3", [S6, S7]>, DwarfRegNum<[259]>;
+def D4 : ARMReg< 4, "d4", [S8, S9]>, DwarfRegNum<[260]>;
+def D5 : ARMReg< 5, "d5", [S10, S11]>, DwarfRegNum<[261]>;
+def D6 : ARMReg< 6, "d6", [S12, S13]>, DwarfRegNum<[262]>;
+def D7 : ARMReg< 7, "d7", [S14, S15]>, DwarfRegNum<[263]>;
+def D8 : ARMReg< 8, "d8", [S16, S17]>, DwarfRegNum<[264]>;
+def D9 : ARMReg< 9, "d9", [S18, S19]>, DwarfRegNum<[265]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
}
// VFP3 defines 16 additional double registers
-def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">;
-def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">;
-def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">;
-def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">;
-def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">;
-def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">;
-def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">;
-def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">;
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
+def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
+def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
+def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
+def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
+def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
+def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
+def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
+def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
+def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>;
+def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>;
+def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>;
+def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>;
+def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
+def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
// Advanced SIMD (NEON) defines 16 quad-word aliases
let SubRegIndices = [dsub_0, dsub_1],
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 82c6735..49fedf6 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -656,19 +656,19 @@ def CortexA9Itineraries : ProcessorItineraries<
[1, 1, 1]>,
//
// Single-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
- InstrStage<1, [A9_MUX0], 0>,
- InstrStage<1, [A9_DRegsVFP], 0, Required>,
- InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>],
[2, 1]>,
//
// Double-precision to Integer Move
+ //
+ // On A9 move-from-VFP is free to issue with no stall if other VFP
+ // operations are in flight. I assume it still can't dual-issue though.
InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
- InstrStage<1, [A9_MUX0], 0>,
- InstrStage<1, [A9_DRegsVFP], 0, Required>,
- InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_NPipe]>],
+ InstrStage<1, [A9_MUX0], 0>],
[2, 1, 1]>,
//
// Single-precision FP Load
@@ -691,20 +691,22 @@ def CortexA9Itineraries : ProcessorItineraries<
[2, 1]>,
//
// FP Load Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
//
// FP Load Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
//
// Single-precision FP Store
InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -725,205 +727,206 @@ def CortexA9Itineraries : ProcessorItineraries<
[1, 1]>,
//
// FP Store Multiple
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
//
// FP Store Multiple + update
+ // FIXME: assumes 2 doubles which requires 2 LS cycles.
InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
InstrStage<1, [A9_NPipe], 0>,
- InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
+ InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
// NEON
// VLD1
- // FIXME: Conservatively assume insufficent alignment.
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [2, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1]>,
// VLD1x2
InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [2, 2, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 1]>,
// VLD1x3
InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [2, 2, 3, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
// VLD1x4
InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [2, 2, 3, 3, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
// VLD1u
InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [2, 2, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 2, 1]>,
// VLD1x2u
InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [2, 2, 2, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [1, 1, 2, 1]>,
// VLD1x3u
InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [2, 2, 3, 2, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 1]>,
// VLD1x4u
InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [2, 2, 3, 3, 2, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [1, 1, 2, 2, 2, 1]>,
//
// VLD1ln
InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [4, 1, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 1, 1, 1]>,
//
// VLD1lnu
InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [4, 2, 1, 1, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 2, 1, 1, 1, 1]>,
//
// VLD1dup
InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 1]>,
//
// VLD1dupu
InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 2, 1, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1, 1]>,
//
// VLD2
InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 3, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
//
// VLD2x2
InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [3, 4, 3, 4, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 1]>,
//
// VLD2ln
InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [4, 4, 1, 1, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 1, 1, 1, 1]>,
//
// VLD2u
InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 3, 2, 1, 1, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1, 1]>,
//
// VLD2x2u
InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [3, 4, 3, 4, 2, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 3, 2, 3, 2, 1]>,
//
// VLD2lnu
InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [4, 4, 2, 1, 1, 1, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [3, 3, 2, 1, 1, 1, 1, 1]>,
//
// VLD2dup
InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 3, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 1]>,
//
// VLD2dupu
InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
- [3, 3, 2, 1, 1]>,
+ InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
+ [2, 2, 2, 1, 1]>,
//
// VLD3
InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<4, [A9_NPipe], 0>,
- InstrStage<4, [A9_LSUnit]>],
- [4, 4, 5, 1]>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 1]>,
//
// VLD3ln
InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -938,10 +941,10 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<4, [A9_NPipe], 0>,
- InstrStage<4, [A9_LSUnit]>],
- [4, 4, 5, 2, 1]>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 2, 1]>,
//
// VLD3lnu
InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
@@ -974,108 +977,108 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<4, [A9_NPipe], 0>,
- InstrStage<4, [A9_LSUnit]>],
- [4, 4, 5, 5, 1]>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 1]>,
//
// VLD4ln
InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<5, [A9_NPipe], 0>,
- InstrStage<5, [A9_LSUnit]>],
- [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
//
// VLD4u
InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<4, [A9_NPipe], 0>,
- InstrStage<4, [A9_LSUnit]>],
- [4, 4, 5, 5, 2, 1]>,
+ InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<3, [A9_NPipe], 0>,
+ InstrStage<3, [A9_LSUnit]>],
+ [3, 3, 4, 4, 2, 1]>,
//
// VLD4lnu
InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
- InstrStage<5, [A9_NPipe], 0>,
- InstrStage<5, [A9_LSUnit]>],
- [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
+ InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+ InstrStage<4, [A9_NPipe], 0>,
+ InstrStage<4, [A9_LSUnit]>],
+ [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
//
// VLD4dup
InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [3, 3, 4, 4, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 1]>,
//
// VLD4dupu
InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
- [3, 3, 4, 4, 2, 1, 1]>,
+ InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
+ [2, 2, 3, 3, 2, 1, 1]>,
//
// VST1
InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1]>,
//
// VST1x2
InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1, 1]>,
//
// VST1x3
InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2]>,
//
// VST1x4
InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2, 2]>,
//
// VST1u
InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1]>,
//
// VST1x2u
InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1]>,
//
// VST1x3u
@@ -1083,44 +1086,44 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2]>,
//
// VST1x4u
InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2, 2]>,
//
// VST1ln
InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1]>,
//
// VST1lnu
InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1]>,
//
// VST2
InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1, 1]>,
//
// VST2x2
@@ -1136,9 +1139,9 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1]>,
//
// VST2x2u
@@ -1154,36 +1157,36 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<2, [A9_NPipe], 0>,
- InstrStage<2, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[1, 1, 1, 1]>,
//
// VST2lnu
InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<1, [A9_NPipe], 0>,
+ InstrStage<1, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1]>,
//
// VST3
InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2]>,
//
// VST3u
InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2]>,
//
// VST3ln
@@ -1208,36 +1211,36 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2, 2]>,
//
// VST4u
InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2, 2]>,
//
// VST4ln
InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[1, 1, 1, 1, 2, 2]>,
//
// VST4lnu
InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
InstrStage<1, [A9_MUX0], 0>,
InstrStage<1, [A9_DRegsN], 0, Required>,
- InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<3, [A9_NPipe], 0>,
- InstrStage<3, [A9_LSUnit]>],
+ InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+ InstrStage<2, [A9_NPipe], 0>,
+ InstrStage<2, [A9_LSUnit]>],
[2, 1, 1, 1, 1, 1, 2, 2]>,
//
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 2b9202b..aa1e398 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -35,7 +35,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
// This requires 4-byte alignment.
if ((Align & 3) != 0)
return SDValue();
- // This requires the copy size to be a constant, preferrably
+ // This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 1465984..c6f266b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -38,6 +38,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(false)
, SlowFPVMLx(false)
+ , HasVMLxForwarding(false)
, SlowFPBrcc(false)
, IsThumb(isT)
, ThumbMode(Thumb1)
@@ -51,6 +52,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
+ , AvoidCPSRPartialUpdate(false)
, HasMPExtension(false)
, FPOnlySP(false)
, AllowsUnalignedMem(false)
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 76c1c3f..0271c87 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -61,6 +61,10 @@ protected:
/// whether the FP VML[AS] instructions are slow (if so, don't use them).
bool SlowFPVMLx;
+ /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+ /// forwarding to allow mul + mla being issued back to back.
+ bool HasVMLxForwarding;
+
/// SlowFPBrcc - True if floating point compare + branch is slow.
bool SlowFPBrcc;
@@ -106,6 +110,11 @@ protected:
/// over 16-bit ones.
bool Pref32BitThumb;
+ /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions
+ /// that partially update CPSR and add false dependency on the previous
+ /// CPSR setting instruction.
+ bool AvoidCPSRPartialUpdate;
+
/// HasMPExtension - True if the subtarget supports Multiprocessing
/// extension (ARMv7 only).
bool HasMPExtension;
@@ -182,15 +191,19 @@ protected:
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
bool useFPVMLx() const { return !SlowFPVMLx; }
+ bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
+ bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
- bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetELF() const { return !isTargetDarwin(); }
bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 0ee773b..29aa4f7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -22,16 +22,13 @@
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden);
-
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
- switch (TheTriple.getOS()) {
- case Triple::Darwin:
+
+ if (TheTriple.isOSDarwin())
return new ARMMCAsmInfoDarwin();
- default:
- return new ARMELFMCAsmInfo();
- }
+
+ return new ARMELFMCAsmInfo();
}
// This is duplicated code. Refactor this.
@@ -41,17 +38,17 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
MCCodeEmitter *Emitter,
bool RelaxAll,
bool NoExecStack) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
+
+ if (TheTriple.isOSWindows()) {
llvm_unreachable("ARM does not support Windows COFF format");
return NULL;
- default:
- return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
}
+
+ return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeARMTarget() {
@@ -86,8 +83,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, isThumb),
JITInfo(),
- InstrItins(Subtarget.getInstrItineraryData())
-{
+ InstrItins(Subtarget.getInstrItineraryData()) {
DefRelocModel = getRelocationModel();
}
@@ -149,8 +145,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
- if (ExpandMLx &&
- OptLevel != CodeGenOpt::None && Subtarget.hasVFP2())
+ if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
PM.add(createMLxExpansionPass());
return true;
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 7535da5..19defa1 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -36,8 +36,9 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
ELF::SHF_WRITE |
ELF::SHF_ALLOC,
SectionKind::getDataRel());
+ LSDASection = NULL;
}
-
+
AttributesSection =
getContext().getELFSection(".ARM.attributes",
ELF::SHT_ARM_ATTRIBUTES,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 129af20..29ecc18 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -29,15 +29,6 @@
#include "llvm/ADT/Twine.h"
using namespace llvm;
-/// Shift types used for register controlled shifts in ARM memory addressing.
-enum ShiftType {
- Lsl,
- Lsr,
- Asr,
- Ror,
- Rrx
-};
-
namespace {
class ARMOperand;
@@ -55,8 +46,10 @@ class ARMAsmParser : public TargetAsmParser {
int TryParseRegister();
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &,
+ ARMII::AddrMode AddrMode);
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
@@ -65,13 +58,14 @@ class ARMAsmParser : public TargetAsmParser {
bool ParseMemoryOffsetReg(bool &Negative,
bool &OffsetRegShifted,
- enum ShiftType &ShiftType,
+ enum ARM_AM::ShiftOpc &ShiftType,
const MCExpr *&ShiftAmount,
const MCExpr *&Offset,
bool &OffsetIsReg,
int &OffsetRegNum,
SMLoc &E);
- bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
+ bool ParseShift(enum ARM_AM::ShiftOpc &St,
+ const MCExpr *&ShiftAmount, SMLoc &E);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveThumb(SMLoc L);
bool ParseDirectiveThumbFunc(SMLoc L);
@@ -102,10 +96,25 @@ class ARMAsmParser : public TargetAsmParser {
SmallVectorImpl<MCParsedAsmOperand*>&);
OperandMatchResultTy tryParseMSRMaskOperand(
SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMemMode2Operand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy tryParseMemMode3Operand(
+ SmallVectorImpl<MCParsedAsmOperand*>&);
+
+ // Asm Match Converter Methods
+ bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+ bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
public:
ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
: TargetAsmParser(T), Parser(_Parser), TM(_TM) {
+ MCAsmParserExtension::Initialize(_Parser);
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(
&TM.getSubtarget<ARMSubtarget>()));
@@ -136,6 +145,7 @@ class ARMOperand : public MCParsedAsmOperand {
RegisterList,
DPRRegisterList,
SPRRegisterList,
+ Shifter,
Token
} Kind;
@@ -178,13 +188,14 @@ class ARMOperand : public MCParsedAsmOperand {
/// Combined record for all forms of ARM address expressions.
struct {
+ ARMII::AddrMode AddrMode;
unsigned BaseRegNum;
union {
unsigned RegNum; ///< Offset register num, when OffsetIsReg.
const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
} Offset;
const MCExpr *ShiftAmount; // used when OffsetRegShifted is true
- enum ShiftType ShiftType; // used when OffsetRegShifted is true
+ enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true
unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
unsigned Preindexed : 1;
unsigned Postindexed : 1;
@@ -192,6 +203,11 @@ class ARMOperand : public MCParsedAsmOperand {
unsigned Negative : 1; // only used when OffsetIsReg is true
unsigned Writeback : 1;
} Mem;
+
+ struct {
+ ARM_AM::ShiftOpc ShiftTy;
+ unsigned RegNum;
+ } Shift;
};
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -234,6 +250,10 @@ public:
break;
case ProcIFlags:
IFlags = o.IFlags;
+ break;
+ case Shifter:
+ Shift = o.Shift;
+ break;
}
}
@@ -290,7 +310,9 @@ public:
/// @name Memory Operand Accessors
/// @{
-
+ ARMII::AddrMode getMemAddrMode() const {
+ return Mem.AddrMode;
+ }
unsigned getMemBaseRegNum() const {
return Mem.BaseRegNum;
}
@@ -310,7 +332,7 @@ public:
assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
return Mem.ShiftAmount;
}
- enum ShiftType getMemShiftType() const {
+ enum ARM_AM::ShiftOpc getMemShiftType() const {
assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
return Mem.ShiftType;
}
@@ -334,6 +356,52 @@ public:
bool isToken() const { return Kind == Token; }
bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
bool isMemory() const { return Kind == Memory; }
+ bool isShifter() const { return Kind == Shifter; }
+ bool isMemMode2() const {
+ if (getMemAddrMode() != ARMII::AddrMode2)
+ return false;
+
+ if (getMemOffsetIsReg())
+ return true;
+
+ if (getMemNegative() &&
+ !(getMemPostindexed() || getMemPreindexed()))
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+
+ // The offset must be in the range 0-4095 (imm12).
+ if (Value > 4095 || Value < -4095)
+ return false;
+
+ return true;
+ }
+ bool isMemMode3() const {
+ if (getMemAddrMode() != ARMII::AddrMode3)
+ return false;
+
+ if (getMemOffsetIsReg()) {
+ if (getMemOffsetRegShifted())
+ return false; // No shift with offset reg allowed
+ return true;
+ }
+
+ if (getMemNegative() &&
+ !(getMemPostindexed() || getMemPreindexed()))
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+
+ // The offset must be in the range 0-255 (imm8).
+ if (Value > 255 || Value < -255)
+ return false;
+
+ return true;
+ }
bool isMemMode5() const {
if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
getMemNegative())
@@ -346,6 +414,23 @@ public:
int64_t Value = CE->getValue();
return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
}
+ bool isMemMode7() const {
+ if (!isMemory() ||
+ getMemPreindexed() ||
+ getMemPostindexed() ||
+ getMemOffsetIsReg() ||
+ getMemNegative() ||
+ getMemWriteback())
+ return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ if (!CE) return false;
+
+ if (CE->getValue())
+ return false;
+
+ return true;
+ }
bool isMemModeRegThumb() const {
if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
return false;
@@ -402,6 +487,12 @@ public:
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
+ void addShifterOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(
+ ARM_AM::getSORegOpc(Shift.ShiftTy, 0)));
+ }
+
void addRegListOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const SmallVectorImpl<unsigned> &RegList = getRegList();
@@ -428,6 +519,88 @@ public:
Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
}
+ void addMemMode7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && isMemMode7() && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ (void)CE;
+ assert((CE || CE->getValue() == 0) &&
+ "No offset operand support in mode 7");
+ }
+
+ void addMemMode2Operands(MCInst &Inst, unsigned N) const {
+ assert(isMemMode2() && "Invalid mode or number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+
+ if (getMemOffsetIsReg()) {
+ Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+
+ ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift;
+ int64_t ShiftAmount = 0;
+
+ if (getMemOffsetRegShifted()) {
+ ShOpc = getMemShiftType();
+ const MCConstantExpr *CE =
+ dyn_cast<MCConstantExpr>(getMemShiftAmount());
+ ShiftAmount = CE->getValue();
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount,
+ ShOpc, IdxMode)));
+ return;
+ }
+
+ // Create a operand placeholder to always yield the same number of operands.
+ Inst.addOperand(MCOperand::CreateReg(0));
+
+ // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+ // the difference?
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode 2 offset operand!");
+ int64_t Offset = CE->getValue();
+
+ if (Offset >= 0)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add,
+ Offset, ARM_AM::no_shift, IdxMode)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub,
+ -Offset, ARM_AM::no_shift, IdxMode)));
+ }
+
+ void addMemMode3Operands(MCInst &Inst, unsigned N) const {
+ assert(isMemMode3() && "Invalid mode or number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+ unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1);
+
+ if (getMemOffsetIsReg()) {
+ Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+
+ ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add;
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0,
+ IdxMode)));
+ return;
+ }
+
+ // Create a operand placeholder to always yield the same number of operands.
+ Inst.addOperand(MCOperand::CreateReg(0));
+
+ // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+ // the difference?
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+ assert(CE && "Non-constant mode 3 offset operand!");
+ int64_t Offset = CE->getValue();
+
+ if (Offset >= 0)
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add,
+ Offset, IdxMode)));
+ else
+ Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub,
+ -Offset, IdxMode)));
+ }
+
void addMemMode5Operands(MCInst &Inst, unsigned N) const {
assert(N == 2 && isMemMode5() && "Invalid number of operands!");
@@ -525,6 +698,15 @@ public:
return Op;
}
+ static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy,
+ SMLoc S, SMLoc E) {
+ ARMOperand *Op = new ARMOperand(Shifter);
+ Op->Shift.ShiftTy = ShTy;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static ARMOperand *
CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
SMLoc StartLoc, SMLoc EndLoc) {
@@ -553,9 +735,10 @@ public:
return Op;
}
- static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
- const MCExpr *Offset, int OffsetRegNum,
- bool OffsetRegShifted, enum ShiftType ShiftType,
+ static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum,
+ bool OffsetIsReg, const MCExpr *Offset,
+ int OffsetRegNum, bool OffsetRegShifted,
+ enum ARM_AM::ShiftOpc ShiftType,
const MCExpr *ShiftAmount, bool Preindexed,
bool Postindexed, bool Negative, bool Writeback,
SMLoc S, SMLoc E) {
@@ -571,6 +754,7 @@ public:
"Cannot have expression offset and register offset!");
ARMOperand *Op = new ARMOperand(Memory);
+ Op->Mem.AddrMode = AddrMode;
Op->Mem.BaseRegNum = BaseRegNum;
Op->Mem.OffsetIsReg = OffsetIsReg;
if (OffsetIsReg)
@@ -642,7 +826,8 @@ void ARMOperand::dump(raw_ostream &OS) const {
break;
case Memory:
OS << "<memory "
- << "base:" << getMemBaseRegNum();
+ << "am:" << ARMII::AddrModeToString(getMemAddrMode())
+ << " base:" << getMemBaseRegNum();
if (getMemOffsetIsReg()) {
OS << " offset:<register " << getMemOffsetRegNum();
if (getMemOffsetRegShifted()) {
@@ -676,6 +861,9 @@ void ARMOperand::dump(raw_ostream &OS) const {
case Register:
OS << "<register " << getReg() << ">";
break;
+ case Shifter:
+ OS << "<shifter " << getShiftOpcStr(Shift.ShiftTy) << ">";
+ break;
case RegisterList:
case DPRRegisterList:
case SPRRegisterList: {
@@ -738,6 +926,42 @@ int ARMAsmParser::TryParseRegister() {
return RegNum;
}
+/// Try to parse a register name. The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned. Otherwise return -1.
+///
+bool ARMAsmParser::TryParseShiftRegister(
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ std::string upperCase = Tok.getString().str();
+ std::string lowerCase = LowercaseString(upperCase);
+ ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+ .Case("lsl", ARM_AM::lsl)
+ .Case("lsr", ARM_AM::lsr)
+ .Case("asr", ARM_AM::asr)
+ .Case("ror", ARM_AM::ror)
+ .Case("rrx", ARM_AM::rrx)
+ .Default(ARM_AM::no_shift);
+
+ if (ShiftTy == ARM_AM::no_shift)
+ return true;
+
+ Parser.Lex(); // Eat shift-type operand;
+ int RegNum = TryParseRegister();
+ if (RegNum == -1)
+ return Error(Parser.getTok().getLoc(), "register expected");
+
+ Operands.push_back(ARMOperand::CreateReg(RegNum,S, Parser.getTok().getLoc()));
+ Operands.push_back(ARMOperand::CreateShifter(ShiftTy,
+ S, Parser.getTok().getLoc()));
+
+ return false;
+}
+
+
/// Try to parse a register name. The token must be an Identifier when called.
/// If it's a register, an AsmOperand is created. Another AsmOperand is created
/// if there is a "writeback". 'true' if it's not a register.
@@ -1046,13 +1270,96 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
+/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+
+ if (ParseMemory(Operands, ARMII::AddrMode2))
+ return MatchOperand_NoMatch;
+
+ return MatchOperand_Success;
+}
+
+/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\"");
+
+ if (ParseMemory(Operands, ARMII::AddrMode3))
+ return MatchOperand_NoMatch;
+
+ return MatchOperand_Success;
+}
+
+/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+
+ ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
+/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst.
+/// Needed here because the Asm Gen Matcher can't handle properly tied operands
+/// when they refer multiple MIOperands inside a single one.
+bool ARMAsmParser::
+CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // Create a writeback register dummy placeholder.
+ Inst.addOperand(MCOperand::CreateImm(0));
+ ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1);
+ ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3);
+ ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+ return true;
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
///
/// TODO Only preindexing and postindexing addressing are started, unindexed
/// with option, etc are still to do.
bool ARMAsmParser::
-ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ ARMII::AddrMode AddrMode = ARMII::AddrModeNone) {
SMLoc S, E;
assert(Parser.getTok().is(AsmToken::LBrac) &&
"Token is not a Left Bracket");
@@ -1083,7 +1390,7 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
ARMOperand *WBOp = 0;
int OffsetRegNum = -1;
bool OffsetRegShifted = false;
- enum ShiftType ShiftType = Lsl;
+ enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl;
const MCExpr *ShiftAmount = 0;
const MCExpr *Offset = 0;
@@ -1106,10 +1413,17 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
const AsmToken &ExclaimTok = Parser.getTok();
if (ExclaimTok.is(AsmToken::Exclaim)) {
+ // None of addrmode3 instruction uses "!"
+ if (AddrMode == ARMII::AddrMode3)
+ return true;
+
WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
ExclaimTok.getLoc());
Writeback = true;
Parser.Lex(); // Eat exclaim token
+ } else { // In addressing mode 2, pre-indexed mode always end with "!"
+ if (AddrMode == ARMII::AddrMode2)
+ Preindexed = false;
}
} else {
// The "[Rn" we have so far was not followed by a comma.
@@ -1143,13 +1457,17 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (!OffsetIsReg) {
if (!Offset)
Offset = MCConstantExpr::Create(0, getContext());
+ } else {
+ if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) {
+ Error(E, "shift amount not supported");
+ return true;
+ }
}
- Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset,
- OffsetRegNum, OffsetRegShifted,
- ShiftType, ShiftAmount, Preindexed,
- Postindexed, Negative, Writeback,
- S, E));
+ Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg,
+ Offset, OffsetRegNum, OffsetRegShifted,
+ ShiftType, ShiftAmount, Preindexed,
+ Postindexed, Negative, Writeback, S, E));
if (WBOp)
Operands.push_back(WBOp);
@@ -1165,7 +1483,7 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// we return false on success or an error otherwise.
bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
bool &OffsetRegShifted,
- enum ShiftType &ShiftType,
+ enum ARM_AM::ShiftOpc &ShiftType,
const MCExpr *&ShiftAmount,
const MCExpr *&Offset,
bool &OffsetIsReg,
@@ -1226,28 +1544,28 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
/// ( lsl | lsr | asr | ror ) , # shift_amount
/// rrx
/// and returns true if it parses a shift otherwise it returns false.
-bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
- SMLoc &E) {
+bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St,
+ const MCExpr *&ShiftAmount, SMLoc &E) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
return true;
StringRef ShiftName = Tok.getString();
if (ShiftName == "lsl" || ShiftName == "LSL")
- St = Lsl;
+ St = ARM_AM::lsl;
else if (ShiftName == "lsr" || ShiftName == "LSR")
- St = Lsr;
+ St = ARM_AM::lsr;
else if (ShiftName == "asr" || ShiftName == "ASR")
- St = Asr;
+ St = ARM_AM::asr;
else if (ShiftName == "ror" || ShiftName == "ROR")
- St = Ror;
+ St = ARM_AM::ror;
else if (ShiftName == "rrx" || ShiftName == "RRX")
- St = Rrx;
+ St = ARM_AM::rrx;
else
return true;
Parser.Lex(); // Eat shift type token.
// Rrx stands alone.
- if (St == Rrx)
+ if (St == ARM_AM::rrx)
return false;
// Otherwise, there must be a '#' and a shift amount.
@@ -1286,6 +1604,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
case AsmToken::Identifier:
if (!TryParseRegisterWithWriteBack(Operands))
return false;
+ if (!TryParseShiftRegister(Operands))
+ return false;
+
// Fall though for the Identifier case that is not a register or a
// special name.
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 78d73d3..bdce2c4 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -18,6 +18,7 @@
#include "ARMDisassembler.h"
#include "ARMDisassemblerCore.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetRegistry.h"
@@ -94,6 +95,9 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
// As a result, the decoder fails to deocode USAT properly.
if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
return ARM::USAT;
+ // As a result, the decoder fails to deocode UQADD16 properly.
+ if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1)
+ return ARM::UQADD16;
// Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
// As a result, the decoder fails to decode UMULL properly.
@@ -280,6 +284,24 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
}
}
+// Helper function for special case handling of PLD (literal) and friends.
+// See A8.6.117 T1 & T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2PLDLiteral(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return Opcode; // Return unmorphed opcode.
+
+ case ARM::t2PLDi8: case ARM::t2PLDs:
+ case ARM::t2PLDWi12: case ARM::t2PLDWi8:
+ case ARM::t2PLDWs:
+ return ARM::t2PLDi12;
+
+ case ARM::t2PLIi8: case ARM::t2PLIs:
+ return ARM::t2PLIi12;
+ }
+}
+
/// decodeThumbSideEffect is a decorator function which can potentially twiddle
/// the instruction or morph the returned opcode under Thumb2.
///
@@ -330,12 +352,27 @@ static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
}
// --------- Transform End Marker ---------
+ unsigned unmorphed = decodeThumbInstruction(insn);
+
// See, for example, A6.3.7 Load word: Table A6-18 Load word.
// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
// before returning it to our caller.
if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
- && slice(insn, 19, 16) == 15)
- return T2Morph2LoadLiteral(decodeThumbInstruction(insn));
+ && slice(insn, 19, 16) == 15) {
+ unsigned morphed = T2Morph2LoadLiteral(unmorphed);
+ if (morphed != unmorphed)
+ return morphed;
+ }
+
+ // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for
+ // why we morphed the opcode before returning it to our caller.
+ if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF
+ && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1
+ && slice(insn, 19, 16) == 15) {
+ unsigned morphed = T2Morph2PLDLiteral(unmorphed);
+ if (morphed != unmorphed)
+ return morphed;
+ }
// One last check for NEON/VFP instructions.
if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
@@ -375,21 +412,23 @@ bool ARMDisassembler::getInstruction(MCInst &MI,
Size = 4;
DEBUG({
- errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+ errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode)
<< " Format=" << stringForARMFormat(Format) << '(' << (int)Format
<< ")\n";
showBitVector(errs(), insn);
});
- ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+ OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
if (!Builder)
return false;
+ Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
+ getDisInfoBlock(), getMCContext(),
+ Address);
+
if (!Builder->Build(MI, insn))
return false;
- delete Builder;
-
return true;
}
@@ -398,7 +437,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
const MemoryObject &Region,
uint64_t Address,
raw_ostream &os) const {
- // The Thumb instruction stream is a sequence of halhwords.
+ // The Thumb instruction stream is a sequence of halfwords.
// This represents the first halfword as well as the machine instruction
// passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
@@ -463,17 +502,19 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
showBitVector(errs(), insn);
});
- ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+ OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format));
if (!Builder)
return false;
Builder->SetSession(const_cast<Session *>(&SO));
+ Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
+ getDisInfoBlock(), getMCContext(),
+ Address);
+
if (!Builder->Build(MI, insn))
return false;
- delete Builder;
-
return true;
}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index bac68dd..642829c 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -17,6 +17,7 @@
#include "ARMDisassemblerCore.h"
#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -82,10 +83,28 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) {
// FIXME: Auto-gened?
static unsigned
getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
- // For this purpose, we can treat rGPR as if it were GPR.
- if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
+ if (RegClassID == ARM::rGPRRegClassID) {
+ // Check for The register numbers 13 and 15 that are not permitted for many
+ // Thumb register specifiers.
+ if (RawRegister == 13 || RawRegister == 15) {
+ B->SetErr(-1);
+ return 0;
+ }
+ // For this purpose, we can treat rGPR as if it were GPR.
+ RegClassID = ARM::GPRRegClassID;
+ }
// See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
+ // A7.3 register encoding
+ // Qd -> bit[12] == 0
+ // Qn -> bit[16] == 0
+ // Qm -> bit[0] == 0
+ //
+ // If one of these bits is 1, the instruction is UNDEFINED.
+ if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) {
+ B->SetErr(-1);
+ return 0;
+ }
unsigned RegNum =
RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
@@ -497,14 +516,66 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
}
+// A8.6.94 MLA
+// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE;
+//
+// A8.6.105 MUL
+// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+//
+// A8.6.246 UMULL
+// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+// if dHi == dLo then UNPREDICTABLE;
+static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) {
+ unsigned R19_16 = slice(insn, 19, 16);
+ unsigned R15_12 = slice(insn, 15, 12);
+ unsigned R11_8 = slice(insn, 11, 8);
+ unsigned R3_0 = slice(insn, 3, 0);
+ switch (Opcode) {
+ default:
+ // Did we miss an opcode?
+ DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!");
+ return false;
+ case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT:
+ case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT:
+ case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR:
+ case ARM::USADA8:
+ if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
+ return true;
+ return false;
+ case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR:
+ case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT:
+ case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX:
+ // A8.6.167 SMLAD & A8.6.172 SMLSD
+ case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX:
+ case ARM::USAD8:
+ if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15)
+ return true;
+ return false;
+ case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL:
+ case ARM::UMULL:
+ case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT:
+ case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX:
+ if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15)
+ return true;
+ if (R19_16 == R15_12)
+ return true;
+ return false;;
+ }
+}
+
// Multiply Instructions.
-// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLS:
+// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR,
+// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience):
// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
+// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is
+// only for {d, n, m}.
//
-// MUL, SMMUL, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT:
+// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD,
+// SMUADX, and USAD8 (for convenience):
// Rd{19-16} Rn{3-0} Rm{11-8}
//
-// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT:
+// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT,
+// SMLALD, SMLADLX, SMLSLD, SMLSLDX:
// RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
//
// The mapping of the multiply registers to the "regular" ARM registers, where
@@ -531,6 +602,10 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
&& OpInfo[2].RegClass == ARM::GPRRegClassID
&& "Expect three register operands");
+ // Sanity check for the register encodings.
+ if (BadRegsMulFrm(Opcode, insn))
+ return false;
+
// Instructions with two destination registers have RdLo{15-12} first.
if (NumDefs == 2) {
assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
@@ -618,18 +693,38 @@ static inline unsigned GetCopOpc(uint32_t insn) {
static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr");
+ assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr");
unsigned &OpIdx = NumOpsAdded;
+ // A8.6.92
+ // if coproc == '101x' then SEE "Advanced SIMD and VFP"
+ // But since the special instructions have more explicit encoding bits
+ // specified, if coproc == 10 or 11, we should reject it as invalid.
+ unsigned coproc = GetCoprocessor(insn);
+ if ((Opcode == ARM::MCR || Opcode == ARM::MCRR ||
+ Opcode == ARM::MRC || Opcode == ARM::MRRC) &&
+ (coproc == 10 || coproc == 11)) {
+ DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n");
+ return false;
+ }
+
bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
+
// CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
bool LdStCop = LdStCopOpcode(Opcode);
+ bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2);
OpIdx = 0;
- MI.addOperand(MCOperand::CreateImm(GetCoprocessor(insn)));
+ if (RtOut) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
+ MI.addOperand(MCOperand::CreateImm(coproc));
+ ++OpIdx;
if (LdStCop) {
// Unindex if P:W = 0b00 --> _OPTION variant
@@ -639,26 +734,34 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRn(insn))));
+ OpIdx += 2;
if (PW) {
MI.addOperand(MCOperand::CreateReg(0));
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ unsigned IndexMode =
+ (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
- ARM_AM::no_shift);
+ ARM_AM::no_shift, IndexMode);
MI.addOperand(MCOperand::CreateImm(Offset));
- OpIdx = 5;
+ OpIdx += 2;
} else {
MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
- OpIdx = 4;
+ ++OpIdx;
}
} else {
MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
: GetCopOpc1(insn, NoGPR)));
+ ++OpIdx;
- MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
- : MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
+ if (!RtOut) {
+ MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
+ : MCOperand::CreateReg(
+ getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+ ++OpIdx;
+ }
MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
getRegisterEnum(B, ARM::GPRRegClassID,
@@ -667,7 +770,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
- OpIdx = 5;
+ OpIdx += 2;
if (!OneCopOpc) {
MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
@@ -679,8 +782,8 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Branch Instructions.
-// BLr9: SignExtend(Imm24:'00', 32)
-// Bcc, BLr9_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
+// BL: SignExtend(Imm24:'00', 32)
+// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
// SMC: ZeroExtend(imm4, 32)
// SVC: ZeroExtend(Imm24, 32)
//
@@ -735,6 +838,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// MSRi take a mask, followed by one so_imm operand. The mask contains the
// R Bit in bit 4, and the special register fields in bits 3-0.
if (Opcode == ARM::MSRi) {
+ // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate)
+ // The hints instructions have more specific encodings, so if mask == 0,
+ // we should reject this as an invalid instruction.
+ if (slice(insn, 19, 16) == 0)
+ return false;
MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
slice(insn, 19, 16) /* Special Reg */ ));
// SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
@@ -760,11 +868,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
- assert((Opcode == ARM::Bcc || Opcode == ARM::BLr9 || Opcode == ARM::BLr9_pred
+ assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred
|| Opcode == ARM::SMC || Opcode == ARM::SVC) &&
"Unexpected Opcode");
- assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected");
+ assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
int Imm32 = 0;
if (Opcode == ARM::SMC) {
@@ -778,12 +886,6 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned Imm26 = slice(insn, 23, 0) << 2;
//Imm32 = signextend<signed int, 26>(Imm26);
Imm32 = SignExtend32<26>(Imm26);
-
- // When executing an ARM instruction, PC reads as the address of the current
- // instruction plus 8. The assembler subtracts 8 from the difference
- // between the branch instruction and the target address, disassembler has
- // to add 8 to compensate.
- Imm32 += 8;
}
MI.addOperand(MCOperand::CreateImm(Imm32));
@@ -793,7 +895,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Misc. Branch Instructions.
-// BLXr9, BXr9
+// BLX, BLXi, BX
// BX, BX_RET
static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -809,8 +911,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
return true;
- // BLXr9 and BX take one GPR reg.
- if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) {
+ // BLX and BX take one GPR reg.
+ if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred ||
+ Opcode == ARM::BX) {
assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
"Reg operand expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -819,6 +922,17 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
+ // BLXi takes imm32 (the PC offset).
+ if (Opcode == ARM::BLXi) {
+ assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected");
+ // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}.
+ unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1;
+ int Imm32 = SignExtend32<26>(Imm26);
+ MI.addOperand(MCOperand::CreateImm(Imm32));
+ OpIdx = 1;
+ return true;
+ }
+
return false;
}
@@ -837,6 +951,24 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
return true;
}
+// Standard data-processing instructions allow PC as a register specifier,
+// but we should reject other DPFrm instructions with PC as registers.
+static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) {
+ switch (Opcode) {
+ default:
+ // Did we miss an opcode?
+ if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) {
+ DEBUG(errs() << "DPFrm with bad reg specifier(s)\n");
+ return true;
+ }
+ case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr:
+ case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr:
+ case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr:
+ case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr:
+ return false;
+ }
+}
+
// A major complication is the fact that some of the saturating add/subtract
// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
// They are QADD, QDADD, QDSUB, and QSUB.
@@ -864,6 +996,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Special-case handling of BFC/BFI/SBFX/UBFX.
if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
+ // A8.6.17 BFC & A8.6.18 BFI
+ // Sanity check Rd.
+ if (decodeRd(insn) == 15)
+ return false;
MI.addOperand(MCOperand::CreateReg(0));
if (Opcode == ARM::BFI) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -879,6 +1015,9 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
+ // Sanity check Rd and Rm.
+ if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+ return false;
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
@@ -915,15 +1054,21 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
// routed here as well.
// assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
+ if (BadRegsDPFrm(Opcode, insn))
+ return false;
MI.addOperand(MCOperand::CreateReg(
getRegisterEnum(B, ARM::GPRRegClassID,
RmRn? decodeRn(insn) : decodeRm(insn))));
++OpIdx;
} else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
+ // These two instructions don't allow d as 15.
+ if (decodeRd(insn) == 15)
+ return false;
// We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
- MI.addOperand(MCOperand::CreateImm(Imm16));
+ if (!B->tryAddingSymbolicOperand(Imm16, 4, MI))
+ MI.addOperand(MCOperand::CreateImm(Imm16));
++OpIdx;
} else {
// We have a reg/imm form.
@@ -992,6 +1137,21 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
if (Rs) {
+ // If Inst{7} != 0, we should reject this insn as an invalid encoding.
+ if (slice(insn, 7, 7))
+ return false;
+
+ // A8.6.3 ADC (register-shifted register)
+ // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
+ //
+ // This also accounts for shift instructions (register) where, fortunately,
+ // Inst{19-16} = 0b0000.
+ // A8.6.89 LSL (register)
+ // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+ if (decodeRd(insn) == 15 || decodeRn(insn) == 15 ||
+ decodeRm(insn) == 15 || decodeRs(insn) == 15)
+ return false;
+
// Register-controlled shifts: [Rm, Rs, shift].
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRs(insn))));
@@ -1015,6 +1175,71 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
+static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack,
+ bool Imm) {
+ const StringRef Name = ARMInsts[Opcode].Name;
+ unsigned Rt = decodeRd(insn);
+ unsigned Rn = decodeRn(insn);
+ unsigned Rm = decodeRm(insn);
+ unsigned P = getPBit(insn);
+ unsigned W = getWBit(insn);
+
+ if (Store) {
+ // Only STR (immediate, register) allows PC as the source.
+ if (Name.startswith("STRB") && Rt == 15) {
+ DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ if (WBack && (Rn == 15 || Rn == Rt)) {
+ DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
+ return true;
+ }
+ if (!Imm && Rm == 15) {
+ DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ } else {
+ // Only LDR (immediate, register) allows PC as the destination.
+ if (Name.startswith("LDRB") && Rt == 15) {
+ DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ if (Imm) {
+ // Immediate
+ if (Rn == 15) {
+ // The literal form must be in offset mode; it's an encoding error
+ // otherwise.
+ if (!(P == 1 && W == 0)) {
+ DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n");
+ return true;
+ }
+ // LDRB (literal) does not allow PC as the destination.
+ if (Opcode != ARM::LDRi12 && Rt == 15) {
+ DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ } else {
+ // Write back while Rn == Rt does not make sense.
+ if (WBack && (Rn == Rt)) {
+ DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
+ return true;
+ }
+ }
+ } else {
+ // Register
+ if (Rm == 15) {
+ DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ if (WBack && (Rn == 15 || Rn == Rt)) {
+ DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n");
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
@@ -1077,19 +1302,41 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
if (OpIdx + 1 >= NumOps)
return false;
- assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass < 0) &&
- "Expect 1 reg operand followed by 1 imm operand");
+ if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0))
+ return false;
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ unsigned IndexMode =
+ (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
if (getIBit(insn) == 0) {
- MI.addOperand(MCOperand::CreateReg(0));
+ // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2).
+ // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already
+ // been populated.
+ if (isPrePost) {
+ MI.addOperand(MCOperand::CreateReg(0));
+ OpIdx += 1;
+ }
- // Disassemble the 12-bit immediate offset.
unsigned Imm12 = slice(insn, 11, 0);
- unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift);
- MI.addOperand(MCOperand::CreateImm(Offset));
+ if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 ||
+ Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) {
+ // Disassemble the 12-bit immediate offset, which is the second operand in
+ // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm).
+ int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12;
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ } else {
+ // Disassemble the 12-bit immediate offset, which is the second operand in
+ // $am2offset => (ops GPR, i32imm).
+ unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift,
+ IndexMode);
+ MI.addOperand(MCOperand::CreateImm(Offset));
+ }
+ OpIdx += 1;
} else {
+ // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid.
+ if (slice(insn,4,4) == 1)
+ return false;
+
// Disassemble the offset reg (Rm), shift type, and immediate shift length.
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
@@ -1101,9 +1348,9 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// A8.4.1. Possible rrx or shift amount of 32...
getImmShiftSE(ShOp, ShImm);
MI.addOperand(MCOperand::CreateImm(
- ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+ ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode)));
+ OpIdx += 2;
}
- OpIdx += 2;
return true;
}
@@ -1125,7 +1372,7 @@ static bool HasDualReg(unsigned Opcode) {
case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
return true;
- }
+ }
}
static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
@@ -1153,8 +1400,6 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
++OpIdx;
}
- bool DualReg = HasDualReg(Opcode);
-
// Disassemble the dst/src operand.
if (OpIdx >= NumOps)
return false;
@@ -1165,8 +1410,8 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
decodeRd(insn))));
++OpIdx;
- // Fill in LDRD and STRD's second operand.
- if (DualReg) {
+ // Fill in LDRD and STRD's second operand Rt operand.
+ if (HasDualReg(Opcode)) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRd(insn) + 1)));
++OpIdx;
@@ -1188,7 +1433,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
"Reg operand expected");
assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
- && "Index mode or tied_to operand expected");
+ && "Offset mode or tied_to operand expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRn(insn))));
++OpIdx;
@@ -1204,19 +1449,22 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
"Expect 1 reg operand followed by 1 imm operand");
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+ unsigned IndexMode =
+ (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
if (getAM3IBit(insn) == 1) {
MI.addOperand(MCOperand::CreateReg(0));
// Disassemble the 8-bit immediate offset.
unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
unsigned Imm4L = insn & 0xF;
- unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L);
+ unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L,
+ IndexMode);
MI.addOperand(MCOperand::CreateImm(Offset));
} else {
// Disassemble the offset reg (Rm).
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
- unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0);
+ unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode);
MI.addOperand(MCOperand::CreateImm(Offset));
}
OpIdx += 2;
@@ -1236,13 +1484,13 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// The algorithm for disassembly of LdStMulFrm is different from others because
-// it explicitly populates the two predicate operands after operand 0 (the base)
-// and operand 1 (the AM4 mode imm). After operand 3, we need to populate the
-// reglist with each affected register encoded as an MCOperand.
+// it explicitly populates the two predicate operands after the base register.
+// After that, we need to populate the reglist with each affected register
+// encoded as an MCOperand.
static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
+ assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4");
NumOpsAdded = 0;
unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
@@ -1260,8 +1508,10 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(Base));
// Handling the two predicate operands before the reglist.
- int64_t CondVal = insn >> ARMII::CondShift;
- MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+ int64_t CondVal = getCondField(insn);
+ if (CondVal == 0xF)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondVal));
MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
NumOpsAdded += 3;
@@ -1352,6 +1602,12 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+ // Sanity check the registers, which should not be 15.
+ if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+ return false;
+ if (ThreeReg && decodeRn(insn) == 15)
+ return false;
+
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRd(insn))));
++OpIdx;
@@ -1376,7 +1632,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
if (Opcode == ARM::PKHBT)
Opc = ARM_AM::lsl;
- else if (Opcode == ARM::PKHBT)
+ else if (Opcode == ARM::PKHTB)
Opc = ARM_AM::asr;
getImmShiftSE(Opc, ShiftAmt);
MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
@@ -1391,6 +1647,11 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ // A8.6.183 SSAT
+ // if d == 15 || n == 15 then UNPREDICTABLE;
+ if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+ return false;
+
const TargetInstrDesc &TID = ARMInsts[Opcode];
NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
@@ -1429,6 +1690,11 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+ // A8.6.220 SXTAB
+ // if d == 15 || m == 15 then UNPREDICTABLE;
+ if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
+ return false;
+
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
unsigned &OpIdx = NumOpsAdded;
@@ -1611,7 +1877,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// A8.6.295 vcvt (floating-point <-> integer)
// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
-//
+//
// A8.6.297 vcvt (floating-point and fixed-point)
// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
@@ -1800,15 +2066,14 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// VFP Load/Store Multiple Instructions.
-// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
-// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is
-// followed by a reglist of either DPR(s) or SPR(s).
+// We have an optional write back reg, the base, and two predicate operands.
+// It is then followed by a reglist of either DPR(s) or SPR(s).
//
// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5");
+ assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4");
unsigned &OpIdx = NumOpsAdded;
@@ -1827,25 +2092,18 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(Base));
- // Next comes the AM4 Opcode.
- ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
- // Must be either "ia" or "db" submode.
- if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
- DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
- return false;
- }
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
-
// Handling the two predicate operands before the reglist.
- int64_t CondVal = insn >> ARMII::CondShift;
- MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+ int64_t CondVal = getCondField(insn);
+ if (CondVal == 0xF)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondVal));
MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
- OpIdx += 4;
+ OpIdx += 3;
- bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSDB ||
+ bool isSPVFP = (Opcode == ARM::VLDMSIA ||
Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
- Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSDB ||
+ Opcode == ARM::VSTMSIA ||
Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
@@ -1855,6 +2113,11 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Fill the variadic part of reglist.
unsigned char Imm8 = insn & 0xFF;
unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
+
+ // Apply some sanity checks before proceeding.
+ if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16))
+ return false;
+
for (unsigned i = 0; i < Regs; ++i) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
RegD + i)));
@@ -2136,7 +2399,7 @@ static unsigned decodeN3VImm(uint32_t insn) {
// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
- BO B) {
+ unsigned alignment, BO B) {
const TargetInstrDesc &TID = ARMInsts[Opcode];
const TargetOperandInfo *OpInfo = TID.OpInfo;
@@ -2180,9 +2443,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+ // addrmode6 := (ops GPR:$addr, i32imm)
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
Rn)));
- MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+ MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
OpIdx += 2;
if (WB) {
@@ -2230,9 +2494,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+ // addrmode6 := (ops GPR:$addr, i32imm)
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
Rn)));
- MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+ MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment
OpIdx += 2;
if (WB) {
@@ -2263,6 +2528,92 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
+// A8.6.308, A8.6.311, A8.6.314, A8.6.317.
+static bool Align4OneLaneInst(unsigned elem, unsigned size,
+ unsigned index_align, unsigned & alignment) {
+ unsigned bits = 0;
+ switch (elem) {
+ default:
+ return false;
+ case 1:
+ // A8.6.308
+ if (size == 0)
+ return slice(index_align, 0, 0) == 0;
+ else if (size == 1) {
+ bits = slice(index_align, 1, 0);
+ if (bits != 0 && bits != 1)
+ return false;
+ if (bits == 1)
+ alignment = 16;
+ return true;
+ } else if (size == 2) {
+ bits = slice(index_align, 2, 0);
+ if (bits != 0 && bits != 3)
+ return false;
+ if (bits == 3)
+ alignment = 32;
+ return true;;
+ }
+ return true;
+ case 2:
+ // A8.6.311
+ if (size == 0) {
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 16;
+ return true;
+ } if (size == 1) {
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 32;
+ return true;
+ } else if (size == 2) {
+ if (slice(index_align, 1, 1) != 0)
+ return false;
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 64;
+ return true;;
+ }
+ return true;
+ case 3:
+ // A8.6.314
+ if (size == 0) {
+ if (slice(index_align, 0, 0) != 0)
+ return false;
+ return true;
+ } if (size == 1) {
+ if (slice(index_align, 0, 0) != 0)
+ return false;
+ return true;
+ return true;
+ } else if (size == 2) {
+ if (slice(index_align, 1, 0) != 0)
+ return false;
+ return true;;
+ }
+ return true;
+ case 4:
+ // A8.6.317
+ if (size == 0) {
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 32;
+ return true;
+ } if (size == 1) {
+ if (slice(index_align, 0, 0) == 1)
+ alignment = 64;
+ return true;
+ } else if (size == 2) {
+ bits = slice(index_align, 1, 0);
+ if (bits == 3)
+ return false;
+ if (bits == 1)
+ alignment = 64;
+ else if (bits == 2)
+ alignment = 128;
+ return true;;
+ }
+ return true;
+ }
+}
+
// A7.7
// If L (Inst{21}) == 0, store instructions.
// Find out about double-spaced-ness of the Opcode and pass it on to
@@ -2272,11 +2623,33 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
const StringRef Name = ARMInsts[Opcode].Name;
bool DblSpaced = false;
+ // 0 represents standard alignment, i.e., unaligned data access.
+ unsigned alignment = 0;
+
+ unsigned elem = 0; // legal values: {1, 2, 3, 4}
+ if (Name.startswith("VST1") || Name.startswith("VLD1"))
+ elem = 1;
+
+ if (Name.startswith("VST2") || Name.startswith("VLD2"))
+ elem = 2;
+
+ if (Name.startswith("VST3") || Name.startswith("VLD3"))
+ elem = 3;
+
+ if (Name.startswith("VST4") || Name.startswith("VLD4"))
+ elem = 4;
if (Name.find("LN") != std::string::npos) {
// To one lane instructions.
// See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
+ // Utility function takes number of elements, size, and index_align.
+ if (!Align4OneLaneInst(elem,
+ slice(insn, 11, 10),
+ slice(insn, 7, 4),
+ alignment))
+ return false;
+
// <size> == 16 && Inst{5} == 1 --> DblSpaced = true
if (Name.endswith("16") || Name.endswith("16_UPD"))
DblSpaced = slice(insn, 5, 5) == 1;
@@ -2284,30 +2657,102 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
// <size> == 32 && Inst{6} == 1 --> DblSpaced = true
if (Name.endswith("32") || Name.endswith("32_UPD"))
DblSpaced = slice(insn, 6, 6) == 1;
-
+ } else if (Name.find("DUP") != std::string::npos) {
+ // Single element (or structure) to all lanes.
+ // Inst{9-8} encodes the number of element(s) in the structure, with:
+ // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32.
+ // 0b01 (VLD2DUP)
+ // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0)
+ // 0b11 (VLD4DUP)
+ //
+ // Inst{7-6} encodes the data size, with:
+ // 0b00 => 8, 0b01 => 16, 0b10 => 32
+ //
+ // Inst{4} (the a bit) encodes the align action (0: standard alignment)
+ unsigned elem = slice(insn, 9, 8) + 1;
+ unsigned a = slice(insn, 4, 4);
+ if (elem != 3) {
+ // 0b11 is not a valid encoding for Inst{7-6}.
+ if (slice(insn, 7, 6) == 3)
+ return false;
+ unsigned data_size = 8 << slice(insn, 7, 6);
+ // For VLD1DUP, a bit makes sense only for data size of 16 and 32.
+ if (a && data_size == 8)
+ return false;
+
+ // Now we can calculate the alignment!
+ if (a)
+ alignment = elem * data_size;
+ } else {
+ if (a) {
+ // A8.6.315 VLD3 (single 3-element structure to all lanes)
+ // The a bit must be encoded as 0.
+ return false;
+ }
+ }
} else {
// Multiple n-element structures with type encoded as Inst{11-8}.
// See, for example, A8.6.316 VLD4 (multiple 4-element structures).
- // n == 2 && type == 0b1001 -> DblSpaced = true
- if (Name.startswith("VST2") || Name.startswith("VLD2"))
- DblSpaced = slice(insn, 11, 8) == 9;
-
- // n == 3 && type == 0b0101 -> DblSpaced = true
- if (Name.startswith("VST3") || Name.startswith("VLD3"))
- DblSpaced = slice(insn, 11, 8) == 5;
-
- // n == 4 && type == 0b0001 -> DblSpaced = true
- if (Name.startswith("VST4") || Name.startswith("VLD4"))
- DblSpaced = slice(insn, 11, 8) == 1;
-
+ // Inst{5-4} encodes alignment.
+ unsigned align = slice(insn, 5, 4);
+ switch (align) {
+ default:
+ break;
+ case 1:
+ alignment = 64; break;
+ case 2:
+ alignment = 128; break;
+ case 3:
+ alignment = 256; break;
+ }
+
+ unsigned type = slice(insn, 11, 8);
+ // Reject UNDEFINED instructions based on type and align.
+ // Plus set DblSpaced flag where appropriate.
+ switch (elem) {
+ default:
+ break;
+ case 1:
+ // n == 1
+ // A8.6.307 & A8.6.391
+ if ((type == 7 && slice(align, 1, 1) == 1) ||
+ (type == 10 && align == 3) ||
+ (type == 6 && slice(align, 1, 1) == 1))
+ return false;
+ break;
+ case 2:
+ // n == 2 && type == 0b1001 -> DblSpaced = true
+ // A8.6.310 & A8.6.393
+ if ((type == 8 || type == 9) && align == 3)
+ return false;
+ DblSpaced = (type == 9);
+ break;
+ case 3:
+ // n == 3 && type == 0b0101 -> DblSpaced = true
+ // A8.6.313 & A8.6.395
+ if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1)
+ return false;
+ DblSpaced = (type == 5);
+ break;
+ case 4:
+ // n == 4 && type == 0b0001 -> DblSpaced = true
+ // A8.6.316 & A8.6.397
+ if (slice(insn, 7, 6) == 3)
+ return false;
+ DblSpaced = (type == 1);
+ break;
+ }
}
return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
- slice(insn, 21, 21) == 0, DblSpaced, B);
+ slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B);
}
// VMOV (immediate)
// Qd/Dd imm
+// VBIC (immediate)
+// VORR (immediate)
+// Qd/Dd imm src(=Qd/Dd)
static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -2334,12 +2779,20 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
case ARM::VMOVv8i16:
case ARM::VMVNv4i16:
case ARM::VMVNv8i16:
+ case ARM::VBICiv4i16:
+ case ARM::VBICiv8i16:
+ case ARM::VORRiv4i16:
+ case ARM::VORRiv8i16:
esize = ESize16;
break;
case ARM::VMOVv2i32:
case ARM::VMOVv4i32:
case ARM::VMVNv2i32:
case ARM::VMVNv4i32:
+ case ARM::VBICiv2i32:
+ case ARM::VBICiv4i32:
+ case ARM::VORRiv2i32:
+ case ARM::VORRiv4i32:
esize = ESize32;
break;
case ARM::VMOVv1i64:
@@ -2347,7 +2800,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
esize = ESize64;
break;
default:
- assert(0 && "Unreachable code!");
+ assert(0 && "Unexpected opcode!");
return false;
}
@@ -2356,6 +2809,16 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
NumOpsAdded = 2;
+
+ // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in.
+ if (NumOps >= 3 &&
+ (OpInfo[2].RegClass == ARM::DPRRegClassID ||
+ OpInfo[2].RegClass == ARM::QPRRegClassID)) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
+ decodeNEONRd(insn))));
+ NumOpsAdded += 1;
+ }
+
return true;
}
@@ -2376,7 +2839,7 @@ enum N2VFlag {
//
// Vector Move Long:
// Qd Dm
-//
+//
// Vector Move Narrow:
// Dd Qm
//
@@ -2518,7 +2981,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
// Add the imm operand.
-
+
// VSHLL has maximum shift count as the imm, inferred from its size.
unsigned Imm;
switch (Opcode) {
@@ -2631,7 +3094,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
// N3RegFrm.
if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ)
return true;
-
+
// Dm = Inst{5:3-0} => NEON Rm
// or
// Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
@@ -2770,7 +3233,7 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
ElemSize esize =
Opcode == ARM::VGETLNi32 ? ESize32
: ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
- : ESize32);
+ : ESize8);
// Rt = Inst{15-12} => ARM Rd
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -2852,17 +3315,6 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
-// A8.6.41 DMB
-// A8.6.42 DSB
-// A8.6.49 ISB
-static inline bool MemBarrierInstr(uint32_t insn) {
- unsigned op7_4 = slice(insn, 7, 4);
- if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
- return true;
-
- return false;
-}
-
static inline bool PreLoadOpcode(unsigned Opcode) {
switch(Opcode) {
case ARM::PLDi12: case ARM::PLDrs:
@@ -2878,8 +3330,8 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
// Preload Data/Instruction requires either 2 or 3 operands.
- // PLDi, PLDWi, PLIi: addrmode_imm12
- // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg
+ // PLDi12, PLDWi12, PLIi12: addrmode_imm12
+ // PLDrs, PLDWrs, PLIrs: ldst_so_reg
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRn(insn))));
@@ -2888,10 +3340,19 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
|| Opcode == ARM::PLIi12) {
unsigned Imm12 = slice(insn, 11, 0);
bool Negative = getUBit(insn) == 0;
+
+ // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12.
+ if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) {
+ DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n");
+ MI.setOpcode(ARM::PLDi12);
+ }
+
// -0 is represented specially. All other values are as normal.
+ int Offset = Negative ? -1 * Imm12 : Imm12;
if (Imm12 == 0 && Negative)
- Imm12 = INT32_MIN;
- MI.addOperand(MCOperand::CreateImm(Imm12));
+ Offset = INT32_MIN;
+
+ MI.addOperand(MCOperand::CreateImm(Offset));
NumOpsAdded = 2;
} else {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -2917,14 +3378,20 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- if (MemBarrierInstr(insn)) {
- // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
- // of within the generic ARMBasicMCBuilder::BuildIt() method.
- //
+ if (Opcode == ARM::DMB || Opcode == ARM::DSB) {
// Inst{3-0} encodes the memory barrier option for the variants.
- MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
- NumOpsAdded = 1;
- return true;
+ unsigned opt = slice(insn, 3, 0);
+ switch (opt) {
+ case ARM_MB::SY: case ARM_MB::ST:
+ case ARM_MB::ISH: case ARM_MB::ISHST:
+ case ARM_MB::NSH: case ARM_MB::NSHST:
+ case ARM_MB::OSH: case ARM_MB::OSHST:
+ MI.addOperand(MCOperand::CreateImm(opt));
+ NumOpsAdded = 1;
+ return true;
+ default:
+ return false;
+ }
}
switch (Opcode) {
@@ -2936,6 +3403,11 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
case ARM::WFI:
case ARM::SEV:
return true;
+ case ARM::SWP:
+ case ARM::SWPB:
+ // SWP, SWPB: Rd Rm Rn
+ // Delegate to DisassembleLdStExFrm()....
+ return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
default:
break;
}
@@ -2950,20 +3422,32 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// opcodes which match the same real instruction. This is needed since there's
// no current handling of optional arguments. Fix here when a better handling
// of optional arguments is implemented.
- if (Opcode == ARM::CPS3p) {
+ if (Opcode == ARM::CPS3p) { // M = 1
+ // Let's reject these impossible imod values by returning false:
+ // 1. (imod=0b01)
+ //
+ // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an
+ // invalid combination, so we just check for imod=0b00 here.
+ if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
+ return false;
MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
NumOpsAdded = 3;
return true;
}
- if (Opcode == ARM::CPS2p) {
+ if (Opcode == ARM::CPS2p) { // mode = 0, M = 0
+ // Let's reject these impossible imod values by returning false:
+ // 1. (imod=0b00,M=0)
+ // 2. (imod=0b01)
+ if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1)
+ return false;
MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags
NumOpsAdded = 2;
return true;
}
- if (Opcode == ARM::CPS1p) {
+ if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1
MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
NumOpsAdded = 1;
return true;
@@ -3142,7 +3626,7 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
return false;
}
-
+
/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
/// the possible Predicate and SBitModifier, to build the remaining MCOperand
/// constituents.
@@ -3154,6 +3638,7 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
const std::string &Name = ARMInsts[Opcode].Name;
unsigned Idx = MI.getNumOperands();
+ uint64_t TSFlags = ARMInsts[Opcode].TSFlags;
// First, we check whether this instr specifies the PredicateOperand through
// a pair of TargetOperandInfos with isPredicate() property.
@@ -3173,14 +3658,23 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
// like ARM.
//
// A8.6.16 B
- if (Name == "t2Bcc")
- MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22))));
- else if (Name == "tBcc")
- MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8))));
- else
+ // Check for undefined encodings.
+ unsigned cond;
+ if (Name == "t2Bcc") {
+ if ((cond = slice(insn, 25, 22)) >= 14)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
+ } else if (Name == "tBcc") {
+ if ((cond = slice(insn, 11, 8)) == 14)
+ return false;
+ MI.addOperand(MCOperand::CreateImm(CondCode(cond)));
+ } else
MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
} else {
// ARM instructions get their condition field from Inst{31-28}.
+ // We should reject Inst{31-28} = 0b1111 as invalid encoding.
+ if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF)
+ return false;
MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
}
}
@@ -3243,3 +3737,84 @@ ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
return new ARMBasicMCBuilder(Opcode, Format,
ARMInsts[Opcode].getNumOperands());
}
+
+/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+/// operand in place of the immediate Value in the MCInst. The immediate
+/// Value has had any PC adjustment made by the caller. If the getOpInfo()
+/// function was set as part of the setupBuilderForSymbolicDisassembly() call
+/// then that function is called to get any symbolic information at the
+/// builder's Address for this instrution. If that returns non-zero then the
+/// symbolic information it returns is used to create an MCExpr and that is
+/// added as an operand to the MCInst. This function returns true if it adds
+/// an operand to the MCInst and false otherwise.
+bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value,
+ uint64_t InstSize,
+ MCInst &MI) {
+ if (!GetOpInfo)
+ return false;
+
+ struct LLVMOpInfo1 SymbolicOp;
+ SymbolicOp.Value = Value;
+ if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp))
+ return false;
+
+ const MCExpr *Add = NULL;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Sub = NULL;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+ } else {
+ Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
+ }
+ }
+
+ const MCExpr *Off = NULL;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+ else
+ LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+ else
+ Expr = LHS;
+ } else if (Add) {
+ if (Off != 0)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off != 0)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, *Ctx);
+ }
+
+ if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
+ MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
+ else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+ else
+ assert("bad SymbolicOp.VariantKind");
+
+ return true;
+}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index 9c30d33..a7ba141 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -22,12 +22,17 @@
#define ARMDISASSEMBLERCORE_H
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm-c/Disassembler.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMDisassembler.h"
namespace llvm {
+class MCContext;
class ARMUtils {
public:
@@ -134,6 +139,31 @@ static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
Bits |= (Val & Mask) << To;
}
+// Return an integer result equal to the number of bits of x that are ones.
+static inline uint32_t
+BitCount (uint64_t x)
+{
+ // c accumulates the total bits set in x
+ uint32_t c;
+ for (c = 0; x; ++c)
+ {
+ x &= x - 1; // clear the least significant bit set
+ }
+ return c;
+}
+
+static inline bool
+BitIsSet (const uint64_t value, const uint64_t bit)
+{
+ return (value & (1ull << bit)) != 0;
+}
+
+static inline bool
+BitIsClear (const uint64_t value, const uint64_t bit)
+{
+ return (value & (1ull << bit)) == 0;
+}
+
/// Various utilities for checking the target specific flags.
/// A unary data processing instruction doesn't have an Rn operand.
@@ -141,6 +171,12 @@ static inline bool isUnaryDP(uint64_t TSFlags) {
return (TSFlags & ARMII::UnaryDP);
}
+/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111.
+static inline bool isNEONDomain(uint64_t TSFlags) {
+ return (TSFlags & ARMII::DomainNEON) ||
+ (TSFlags & ARMII::DomainNEONA8);
+}
+
/// This four-bit field describes the addressing mode used.
/// See also ARMBaseInstrInfo.h.
static inline unsigned getAddrMode(uint64_t TSFlags) {
@@ -196,7 +232,7 @@ private:
public:
ARMBasicMCBuilder(ARMBasicMCBuilder &B)
: Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
- SP(B.SP) {
+ SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) {
Err = 0;
}
@@ -255,6 +291,44 @@ private:
assert(SP);
return slice(SP->ITState, 7, 4);
}
+
+private:
+ //
+ // Hooks for symbolic disassembly via the public 'C' interface.
+ //
+ // The function to get the symbolic information for operands.
+ LLVMOpInfoCallback GetOpInfo;
+ // The pointer to the block of symbolic information for above call back.
+ void *DisInfo;
+ // The assembly context for creating symbols and MCExprs in place of
+ // immediate operands when there is symbolic information.
+ MCContext *Ctx;
+ // The address of the instruction being disassembled.
+ uint64_t Address;
+
+public:
+ void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
+ void *disInfo, MCContext *ctx,
+ uint64_t address) {
+ GetOpInfo = getOpInfo;
+ DisInfo = disInfo;
+ Ctx = ctx;
+ Address = address;
+ }
+
+ uint64_t getBuilderAddress() const { return Address; }
+
+ /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+ /// operand in place of the immediate Value in the MCInst. The immediate
+ /// Value has had any PC adjustment made by the caller. If the getOpInfo()
+ /// function was set as part of the setupBuilderForSymbolicDisassembly() call
+ /// then that function is called to get any symbolic information at the
+ /// builder's Address for this instrution. If that returns non-zero then the
+ /// symbolic information it returns is used to create an MCExpr and that is
+ /// added as an operand to the MCInst. This function returns true if it adds
+ /// an operand to the MCInst and false otherwise.
+ bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI);
+
};
} // namespace llvm
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 23372e0..8d39982 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -108,6 +108,8 @@ static inline bool IsGPR(unsigned RegClass) {
// Utilities for 32-bit Thumb instructions.
+static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; }
+
// Extract imm4: Inst{19-16}.
static inline unsigned getImm4(uint32_t insn) {
return slice(insn, 19, 16);
@@ -398,9 +400,17 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
- MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn)
- : (Imm3 ? getT1Imm3(insn)
- : getT1Imm5(insn))));
+ unsigned Imm = 0;
+ if (UseRt)
+ Imm = getT1Imm8(insn);
+ else if (Imm3)
+ Imm = getT1Imm3(insn);
+ else {
+ Imm = getT1Imm5(insn);
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11));
+ getImmShiftSE(ShOp, Imm);
+ }
+ MI.addOperand(MCOperand::CreateImm(Imm));
}
++OpIdx;
@@ -469,6 +479,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
// tBX_RET: 0 operand
// tBX_RET_vararg: Rm
// tBLXr_r9: Rm
+// tBRIND: Rm
static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -476,11 +487,17 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
if (NumOps == 0)
return true;
- // BX/BLX has 1 reg operand: Rm.
- if (NumOps == 1) {
+ // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm.
+ if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX_Rm || Opcode==ARM::tBRIND) {
+ if (Opcode != ARM::tBRIND) {
+ // Handling the two predicate operands before the reg operand.
+ if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+ return false;
+ NumOpsAdded += 2;
+ }
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
getT1Rm(insn))));
- NumOpsAdded = 1;
+ NumOpsAdded += 1;
return true;
}
@@ -598,7 +615,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
// A6.2.4 Load/store single data item
//
-// Load/Store Register (reg|imm): tRd tRn imm5 tRm
+// Load/Store Register (reg|imm): tRd tRn imm5|tRm
// Load Register Signed Byte|Halfword: tRd tRn tRm
static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -607,11 +624,6 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
const TargetOperandInfo *OpInfo = TID.OpInfo;
unsigned &OpIdx = NumOpsAdded;
- // Table A6-5 16-bit Thumb Load/store instructions
- // opA = 0b0101 for STR/LDR (register) and friends.
- // Otherwise, we have STR/LDR (immediate) and friends.
- bool Imm5 = (opA != 5);
-
assert(NumOps >= 2
&& OpInfo[0].RegClass == ARM::tGPRRegClassID
&& OpInfo[1].RegClass == ARM::tGPRRegClassID
@@ -624,28 +636,28 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
getT1tRn(insn))));
OpIdx = 2;
- // We have either { imm5, tRm } or { tRm } remaining.
- // Process the imm5 first. Note that STR/LDR (register) should skip the imm5
- // offset operand for t_addrmode_s[1|2|4].
+ // We have either { imm5 } or { tRm } remaining.
+ // Note that STR/LDR (register) should skip the imm5 offset operand for
+ // t_addrmode_s[1|2|4].
assert(OpIdx < NumOps && "More operands expected");
if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
!OpInfo[OpIdx].isOptionalDef()) {
-
- MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0));
+ // Table A6-5 16-bit Thumb Load/store instructions
+ // opA = 0b0101 for STR/LDR (register) and friends.
+ // Otherwise, we have STR/LDR (immediate) and friends.
+ assert(opA != 5 && "Immediate operand expected for this opcode");
+ MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn)));
+ ++OpIdx;
+ } else {
+ // The next reg operand is tRm, the offset.
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+ && "Thumb reg operand expected");
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+ getT1tRm(insn))));
++OpIdx;
}
-
- // The next reg operand is tRm, the offset.
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
- && "Thumb reg operand expected");
- MI.addOperand(MCOperand::CreateReg(
- Imm5 ? 0
- : getRegisterEnum(B, ARM::tGPRRegClassID,
- getT1tRm(insn))));
- ++OpIdx;
-
return true;
}
@@ -895,6 +907,10 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
}
unsigned RegListBits = slice(insn, 7, 0);
+ if (BitCount(RegListBits) < 1) {
+ DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n");
+ return false;
+ }
// Fill the variadic part of reglist.
for (unsigned i = 0; i < 8; ++i)
@@ -945,6 +961,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
: (int)Imm8));
// Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
+ // But note that for tBcc, if cond = '1110' then UNDEFINED.
+ if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) {
+ DEBUG(errs() << "if cond = '1110' then UNDEFINED\n");
+ return false;
+ }
NumOpsAdded = 1;
return true;
@@ -965,11 +986,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned Imm11 = getT1Imm11(insn);
- // When executing a Thumb instruction, PC reads as the address of the current
- // instruction plus 4. The assembler subtracts 4 from the difference between
- // the branch instruction and the target address, disassembler has to add 4 to
- // to compensate.
- MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1) + 4));
+ MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1)));
NumOpsAdded = 1;
@@ -1129,8 +1146,12 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
+ unsigned Rn = decodeRn(insn);
+ if (Rn == 15) {
+ DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
+ return false;
+ }
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn)));
NumOpsAdded = 1;
return true;
}
@@ -1149,7 +1170,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
&& "Unexpected opcode");
- assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
+ assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4");
NumOpsAdded = 0;
@@ -1203,45 +1224,79 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && OpInfo[0].RegClass > 0
+ && OpInfo[1].RegClass > 0
&& "Expect >=2 operands and first two as reg operands");
bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
+ unsigned Rt = decodeRd(insn);
+ unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX.
+ unsigned Rd = decodeRm(insn);
+ unsigned Rn = decodeRn(insn);
+
+ // Some sanity checking first.
+ if (isStore) {
+ // if d == n || d == t then UNPREDICTABLE
+ // if d == n || d == t || d == t2 then UNPREDICTABLE
+ if (isDW) {
+ if (Rd == Rn || Rd == Rt || Rd == Rt2) {
+ DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n");
+ return false;
+ }
+ } else {
+ if (isSW) {
+ if (Rt2 == Rn || Rt2 == Rt) {
+ DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
+ return false;
+ }
+ } else {
+ if (Rd == Rn || Rd == Rt) {
+ DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n");
+ return false;
+ }
+ }
+ }
+ } else {
+ // Load
+ // A8.6.71 LDREXD
+ // if t == t2 then UNPREDICTABLE
+ if (isDW && Rt == Rt2) {
+ DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
+ return false;
+ }
+ }
+
// Add the destination operand for store.
if (isStore) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
- isSW ? decodeRs(insn) : decodeRm(insn))));
+ getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ isSW ? Rt2 : Rd)));
++OpIdx;
}
// Source operand for store and destination operand for load.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRd(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ Rt)));
++OpIdx;
// Thumb2 doubleword complication: with an extra source/destination operand.
if (isDW) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRs(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
+ Rt2)));
++OpIdx;
}
// Finally add the pointer operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+ Rn)));
++OpIdx;
return true;
}
-// LLVM, as of Jan-05-2010, does not output <Rt2>, i.e., Rs, in the asm.
-// Whereas the ARM Arch. Manual does not require that t2 = t+1 like in ARM ISA.
-//
// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
// t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
@@ -1255,18 +1310,50 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
if (!OpInfo) return false;
assert(NumOps >= 4
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
- && OpInfo[2].RegClass == ARM::GPRRegClassID
+ && OpInfo[0].RegClass > 0
+ && OpInfo[0].RegClass == OpInfo[1].RegClass
+ && OpInfo[2].RegClass > 0
&& OpInfo[3].RegClass < 0
&& "Expect >= 4 operands and first 3 as reg operands");
+ // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1).
+ unsigned Rt = decodeRd(insn);
+ unsigned Rt2 = decodeRs(insn);
+ unsigned Rn = decodeRn(insn);
+
+ // Some sanity checking first.
+
+ // A8.6.67 LDRD (literal) has its W bit as (0).
+ if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) {
+ if (Rn == 15 && slice(insn, 21, 21) != 0)
+ return false;
+ } else {
+ // For Dual Store, PC cannot be used as the base register.
+ if (Rn == 15) {
+ DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n");
+ return false;
+ }
+ }
+ if (Rt == Rt2) {
+ DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n");
+ return false;
+ }
+ if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) {
+ if (Rn == Rt || Rn == Rt2) {
+ DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n");
+ return false;
+ }
+ }
+
// Add the <Rt> <Rt2> operands.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ unsigned RegClassPair = OpInfo[0].RegClass;
+ unsigned RegClassBase = OpInfo[2].RegClass;
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase,
decodeRn(insn))));
// Finally add (+/-)imm8*4, depending on the U bit.
@@ -1394,9 +1481,12 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()) {
- if (Thumb2ShiftOpcode(Opcode))
- MI.addOperand(MCOperand::CreateImm(getShiftAmtBits(insn)));
- else {
+ if (Thumb2ShiftOpcode(Opcode)) {
+ unsigned Imm = getShiftAmtBits(insn);
+ ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4));
+ getImmShiftSE(ShOp, Imm);
+ MI.addOperand(MCOperand::CreateImm(Imm));
+ } else {
// Build the constant shift specifier operand.
unsigned bits2 = getShiftTypeBits(insn);
unsigned imm5 = getShiftAmtBits(insn);
@@ -1421,7 +1511,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ const TargetOperandInfo *OpInfo = TID.OpInfo;
unsigned &OpIdx = NumOpsAdded;
OpIdx = 0;
@@ -1448,8 +1539,15 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
return false;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
- decodeRn(insn))));
+ int Idx;
+ if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+ // The reg operand is tied to the first reg operand.
+ MI.addOperand(MI.getOperand(Idx));
+ } else {
+ // Add second reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ }
++OpIdx;
}
@@ -1518,7 +1616,7 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
// o t2ADDri12, t2SUBri12: Rs Rn imm12
// o t2LEApcrel (ADR): Rs imm12
// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
-// o t2BFI (BFI) (Currently not defined in LLVM as of Jan-07-2010)
+// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm
// o t2MOVi16: Rs imm16
// o t2MOVTi16: Rs imm16
// o t2SBFX (SBFX): Rs Rn lsb width
@@ -1579,9 +1677,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
|| Opcode == ARM::t2LEApcrel)
MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
- else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
- MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
- else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
+ else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) {
+ if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI))
+ MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
+ } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
uint32_t mask = 0;
if (getBitfieldInvMask(insn, mask))
MI.addOperand(MCOperand::CreateImm(mask));
@@ -1625,8 +1724,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) {
// A8.6.26
// t2BXJ -> Rn
//
-// Miscellaneous control: t2DMBsy (and its t2DMB variants),
-// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
+// Miscellaneous control:
// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
//
// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
@@ -1643,6 +1741,22 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
if (NumOps == 0)
return true;
+ if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) {
+ // Inst{3-0} encodes the memory barrier option for the variants.
+ unsigned opt = slice(insn, 3, 0);
+ switch (opt) {
+ case ARM_MB::SY: case ARM_MB::ST:
+ case ARM_MB::ISH: case ARM_MB::ISHST:
+ case ARM_MB::NSH: case ARM_MB::NSHST:
+ case ARM_MB::OSH: case ARM_MB::OSHST:
+ MI.addOperand(MCOperand::CreateImm(opt));
+ NumOpsAdded = 1;
+ return true;
+ default:
+ return false;
+ }
+ }
+
if (t2MiscCtrlInstr(insn))
return true;
@@ -1719,6 +1833,17 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
return true;
}
+ // Some instructions have predicate operands first before the immediate.
+ if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) {
+ // Handling the two predicate operands before the imm operand.
+ if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+ NumOpsAdded += 2;
+ else {
+ DEBUG(errs() << "Expected predicate operands not found.\n");
+ return false;
+ }
+ }
+
// Add the imm operand.
int Offset = 0;
@@ -1739,13 +1864,12 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
Offset = decodeImm32_BLX(insn);
break;
}
- // When executing a Thumb instruction, PC reads as the address of the current
- // instruction plus 4. The assembler subtracts 4 from the difference between
- // the branch instruction and the target address, disassembler has to add 4 to
- // to compensate.
- MI.addOperand(MCOperand::CreateImm(Offset + 4));
- NumOpsAdded = 1;
+ if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI))
+ MI.addOperand(MCOperand::CreateImm(Offset));
+
+ // This is an increment as some predicate operands may have been added first.
+ NumOpsAdded += 1;
return true;
}
@@ -1787,7 +1911,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
decodeRn(insn))));
++OpIdx;
- if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+ if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
} else {
@@ -1795,17 +1919,17 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
int Offset = 0;
- if (slice(insn, 19, 16) == 0xFF) {
- bool Negative = slice(insn, 23, 23) == 0;
- unsigned Imm12 = getImm12(insn);
- Offset = Negative ? -1 - Imm12 : 1 * Imm12;
- } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
- Opcode == ARM::t2PLIi8) {
+ if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
+ Opcode == ARM::t2PLIi8) {
// A8.6.117 Encoding T2: add = FALSE
unsigned Imm8 = getImm8(insn);
- Offset = -1 - Imm8;
- } else // The i12 forms. See, for example, A8.6.117 Encoding T1.
+ Offset = -1 * Imm8;
+ } else {
+ // The i12 forms. See, for example, A8.6.117 Encoding T1.
+ // Note that currently t2PLDi12 also handles the previously named t2PLDpci
+ // opcode, that's why we use decodeImm12(insn) which returns +/- imm12.
Offset = decodeImm12(insn);
+ }
MI.addOperand(MCOperand::CreateImm(Offset));
}
++OpIdx;
@@ -1820,6 +1944,87 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
+static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load,
+ unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) {
+
+ // Inst{22-21} encodes the data item transferred for load/store.
+ // For single word, it is encoded as ob10.
+ bool Word = (slice(insn, 22, 21) == 2);
+ bool Half = (slice(insn, 22, 21) == 1);
+ bool Byte = (slice(insn, 22, 21) == 0);
+
+ if (UseRm && BadReg(R2)) {
+ DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n");
+ return true;
+ }
+
+ if (Load) {
+ if (!Word && R0 == 13) {
+ DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n");
+ return true;
+ }
+ if (Byte) {
+ if (WB && R0 == 15 && slice(insn, 10, 8) == 3) {
+ // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0)
+ DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
+ return true;
+ }
+ }
+ // A6.3.8 Load halfword, memory hints
+ if (Half) {
+ if (WB) {
+ if (R0 == R1) {
+ // A8.6.82 LDRSH (immediate) Encoding T2
+ DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n");
+ return true;
+ }
+ if (R0 == 15 && slice(insn, 10, 8) == 3) {
+ // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0)
+ DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n");
+ return true;
+ }
+ } else {
+ if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) {
+ if (R0 == 15 && slice(insn, 10, 8) == 4) {
+ // A8.6.82 LDRSH (immediate) Encoding T2
+ DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE"
+ << " \"Unallocated memory hints\"\n");
+ return true;
+ }
+ } else {
+ if (R0 == 15) {
+ // A8.6.82 LDRSH (immediate) Encoding T1
+ DEBUG(errs() << "if Rt == '1111' then SEE"
+ << " \"Unallocated memory hints\"\n");
+ return true;
+ }
+ }
+ }
+ }
+ } else {
+ if (WB && R0 == R1) {
+ DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n");
+ return true;
+ }
+ if ((WB && R0 == 15) || (!WB && R1 == 15)) {
+ DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n");
+ return true;
+ }
+ if (Word) {
+ if ((WB && R1 == 15) || (!WB && R0 == 15)) {
+ DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n");
+ return true;
+ }
+ } else {
+ if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) {
+ DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n");
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
// A6.3.10 Store single data item
// A6.3.9 Load byte, memory hints
// A6.3.8 Load halfword, memory hints
@@ -1865,16 +2070,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
OpIdx = 0;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass > 0 &&
+ OpInfo[1].RegClass > 0 &&
"Expect >= 3 operands and first two as reg operands");
- bool ThreeReg = (OpInfo[2].RegClass == ARM::GPRRegClassID);
+ bool ThreeReg = (OpInfo[2].RegClass > 0);
bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1;
bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
// Build the register operands, followed by the immediate.
- unsigned R0, R1, R2 = 0;
+ unsigned R0 = 0, R1 = 0, R2 = 0;
unsigned Rd = decodeRd(insn);
int Imm = 0;
@@ -1905,19 +2110,24 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
Imm = decodeImm8(insn);
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
R0)));
++OpIdx;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
R1)));
++OpIdx;
if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ // This could be an offset register or a TIED_TO register.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
R2)));
++OpIdx;
}
+ if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO,
+ TIED_TO))
+ return false;
+
assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1947,25 +2157,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::rGPRRegClassID &&
- OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[0].RegClass > 0 &&
+ OpInfo[1].RegClass > 0 &&
"Expect >= 2 operands and first two as reg operands");
// Build the register operands, followed by the optional rotation amount.
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID;
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
decodeRs(insn))));
++OpIdx;
if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass,
decodeRn(insn))));
++OpIdx;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
decodeRm(insn))));
++OpIdx;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 1499da0..fc2aa75 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -29,6 +29,9 @@ StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
}
+StringRef ARMInstPrinter::getRegName(unsigned RegNo) const {
+ return getRegisterName(RegNo);
+}
void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
unsigned Opcode = MI->getOpcode();
@@ -133,9 +136,10 @@ static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream,
unsigned Rot = ARM_AM::getSOImmValRot(V);
// Print low-level immediate formation info, per
- // A5.1.3: "Data-processing operands - Immediate".
+ // A5.2.3: Data-processing (immediate), and
+ // A5.2.4: Modified immediate constants in ARM instructions
if (Rot) {
- O << "#" << Imm << ", " << Rot;
+ O << "#" << Imm << ", #" << Rot;
// Pretty printed version.
if (CommentStream)
*CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n";
@@ -178,18 +182,16 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
}
}
+//===--------------------------------------------------------------------===//
+// Addressing Mode #2
+//===--------------------------------------------------------------------===//
-void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, Op, O);
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (!MO2.getReg()) {
@@ -212,6 +214,50 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
O << "]";
}
+void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+ if (!MO2.getReg()) {
+ unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << ImmOffs;
+ return;
+ }
+
+ O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+ << getRegisterName(MO2.getReg());
+
+ if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+ << " #" << ShImm;
+}
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, O);
+ return;
+ }
+
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+ unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
+
+ if (IdxMode == ARMII::IndexModePost) {
+ printAM2PostIndexOp(MI, Op, O);
+ return;
+ }
+ printAM2PreOrOffsetIndexOp(MI, Op, O);
+}
+
void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -235,11 +281,35 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
<< " #" << ShImm;
}
-void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
- const MCOperand &MO3 = MI->getOperand(OpNum+2);
+//===--------------------------------------------------------------------===//
+// Addressing Mode #3
+//===--------------------------------------------------------------------===//
+
+void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+
+ O << "[" << getRegisterName(MO1.getReg()) << "], ";
+
+ if (MO2.getReg()) {
+ O << (char)ARM_AM::getAM3Op(MO3.getImm())
+ << getRegisterName(MO2.getReg());
+ return;
+ }
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ O << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+ << ImmOffs;
+}
+
+void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(Op);
+ const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO3 = MI->getOperand(Op+2);
O << '[' << getRegisterName(MO1.getReg());
@@ -256,6 +326,18 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
O << ']';
}
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &MO3 = MI->getOperand(Op+2);
+ unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
+
+ if (IdxMode == ARMII::IndexModePost) {
+ printAM3PostIndexOp(MI, Op, O);
+ return;
+ }
+ printAM3PreOrOffsetIndexOp(MI, Op, O);
+}
+
void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -314,6 +396,12 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
O << "]";
}
+void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO1 = MI->getOperand(OpNum);
+ O << "[" << getRegisterName(MO1.getReg()) << "]";
+}
+
void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -414,16 +502,6 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
}
}
-void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNum);
- O << '#';
- if (Op.getImm() < 0)
- O << '-' << (-Op.getImm() - 1);
- else
- O << Op.getImm();
-}
-
void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 679d313..b3ac03a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -17,14 +17,18 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
- class MCOperand;
+
+class MCOperand;
+class TargetMachine;
class ARMInstPrinter : public MCInstPrinter {
public:
- ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+ ARMInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
virtual void printInst(const MCInst *MI, raw_ostream &O);
virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual StringRef getRegName(unsigned RegNo) const;
static const char *getInstructionName(unsigned Opcode);
@@ -38,15 +42,25 @@ public:
void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+
void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
@@ -87,9 +101,7 @@ public:
void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 9a27e2f..f6d0242 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -15,11 +15,13 @@
#define DEBUG_TYPE "mlx-expansion"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -49,15 +51,17 @@ namespace {
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
+ bool isA9;
unsigned MIIdx;
MachineInstr* LastMIs[4];
+ SmallPtrSet<MachineInstr*, 4> IgnoreStall;
void clearStack();
void pushStack(MachineInstr *MI);
MachineInstr *getAccDefMI(MachineInstr *MI) const;
unsigned getDefReg(MachineInstr *MI) const;
bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
- bool FindMLxHazard(MachineInstr *MI) const;
+ bool FindMLxHazard(MachineInstr *MI);
void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
unsigned MulOpc, unsigned AddSubOpc,
bool NegAcc, bool HasLane);
@@ -146,7 +150,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
}
-bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) {
if (NumExpand >= ExpandLimit)
return false;
@@ -154,7 +158,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
return true;
MachineInstr *DefMI = getAccDefMI(MI);
- if (TII->isFpMLxInstruction(DefMI->getOpcode()))
+ if (TII->isFpMLxInstruction(DefMI->getOpcode())) {
// r0 = vmla
// r3 = vmla r0, r1, r2
// takes 16 - 17 cycles
@@ -163,24 +167,33 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
// r4 = vmul r1, r2
// r3 = vadd r0, r4
// takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+ IgnoreStall.insert(DefMI);
return true;
+ }
+
+ if (IgnoreStall.count(MI))
+ return false;
// If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
// VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
// preserves the in-order retirement of the instructions.
// Look at the next few instructions, if *most* of them can cause hazards,
// then the scheduler can't *fix* this, we'd better break up the VMLA.
+ unsigned Limit1 = isA9 ? 1 : 4;
+ unsigned Limit2 = isA9 ? 1 : 4;
for (unsigned i = 1; i <= 4; ++i) {
int Idx = ((int)MIIdx - i + 4) % 4;
MachineInstr *NextMI = LastMIs[Idx];
if (!NextMI)
continue;
- if (TII->canCauseFpMLxStall(NextMI->getOpcode()))
- return true;
+ if (TII->canCauseFpMLxStall(NextMI->getOpcode())) {
+ if (i <= Limit1)
+ return true;
+ }
// Look for VMLx RAW hazard.
- if (hasRAWHazard(getDefReg(MI), NextMI))
+ if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI))
return true;
}
@@ -248,6 +261,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
bool Changed = false;
clearStack();
+ IgnoreStall.clear();
unsigned Skip = 0;
MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
@@ -299,6 +313,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
TRI = Fn.getTarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
+ const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+ isA9 = STI->isCortexA9();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 9fc3fb9..8ba9a27 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -657,3 +657,27 @@ Note that both "tst" and "moveq" are redundant.
//===---------------------------------------------------------------------===//
+When loading immediate constants with movt/movw, if there are multiple
+constants needed with the same low 16 bits, and those values are not live at
+the same time, it would be possible to use a single movw instruction, followed
+by multiple movt instructions to rewrite the high bits to different values.
+For example:
+
+ volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4,
+ !tbaa
+!0
+ volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4,
+ !tbaa
+!0
+
+is compiled and optimized to:
+
+ movw r0, #32796
+ mov.w r1, #-1
+ movt r0, #20480
+ str r1, [r0]
+ movw r0, #32796 @ <= this MOVW is not needed, value is there already
+ movt r0, #20482
+ str r1, [r0]
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 233e165..dee3d27 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -34,13 +34,14 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const
return !MF.getFrameInfo()->hasVarSizedObjects();
}
-static void emitSPUpdate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- const TargetInstrInfo &TII, DebugLoc dl,
- const Thumb1RegisterInfo &MRI,
- int NumBytes) {
- emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
- MRI, dl);
+static void
+emitSPUpdate(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ const TargetInstrInfo &TII, DebugLoc dl,
+ const Thumb1RegisterInfo &MRI,
+ int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI, MIFlags);
}
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -70,11 +71,13 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
int FramePtrSpillFI = 0;
if (VARegSaveSize)
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize);
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+ MachineInstr::FrameSetup);
if (!AFI->hasStackFrame()) {
if (NumBytes != 0)
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
return;
}
@@ -131,7 +134,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
// Adjust FP so it point to the stack slot that contains the previous FP.
if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0);
+ .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .setMIFlags(MachineInstr::FrameSetup);
if (NumBytes > 7)
// If offset is > 7 then sp cannot be adjusted in a single instruction,
// try restoring from fp instead.
@@ -140,7 +144,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
if (NumBytes)
// Insert it after all the callee-save spills.
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes);
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+ MachineInstr::FrameSetup);
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
@@ -156,7 +161,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
// to reference locals.
if (RegInfo->hasBasePointer(MF))
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
-
+
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp. We can assume there's an FP here since hasFP already
// checks for hasVarSizedObjects.
@@ -232,8 +237,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
if (NumBytes) {
assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
"No scratch register to restore SP from FP!");
- emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes,
- TII, *RegInfo, dl);
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+ TII, *RegInfo);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4);
} else
@@ -307,6 +312,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MIB.addReg(Reg, getKillRegState(isKill));
}
+ MIB.setMIFlags(MachineInstr::FrameSetup);
return true;
}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index c592e12..bcfc516 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#ifndef __THUMB_FRAMEINFO_H_
-#define __THUMM_FRAMEINFO_H_
+#define __THUMB_FRAMEINFO_H_
#include "ARM.h"
#include "ARMFrameLowering.h"
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index f62a13e..33cefb6 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -31,8 +31,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -48,15 +46,29 @@ Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
: ARMBaseRegisterInfo(tii, sti) {
}
+const TargetRegisterClass*
+Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
+ const {
+ if (RC == ARM::tGPRRegisterClass || RC->hasSuperClass(ARM::tGPRRegisterClass))
+ return ARM::tGPRRegisterClass;
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
+const TargetRegisterClass *
+Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return ARM::tGPRRegisterClass;
+}
+
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
-void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx,
- int Val,
- ARMCC::CondCodes Pred,
- unsigned PredReg) const {
+void
+Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
@@ -64,8 +76,9 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg)
+ .setMIFlags(MIFlags);
}
@@ -76,11 +89,12 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
static
void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
unsigned DestReg, unsigned BaseReg,
int NumBytes, bool CanChangeCC,
const TargetInstrInfo &TII,
const ARMBaseRegisterInfo& MRI,
- DebugLoc dl) {
+ unsigned MIFlags = MachineInstr::NoFlags) {
MachineFunction &MF = *MBB.getParent();
bool isHigh = !isARMLowRegister(DestReg) ||
(BaseReg != 0 && !isARMLowRegister(BaseReg));
@@ -101,14 +115,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
if (NumBytes <= 255 && NumBytes >= 0)
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
- .addImm(NumBytes);
+ .addImm(NumBytes).setMIFlags(MIFlags);
else if (NumBytes < 0 && NumBytes >= -255) {
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
- .addImm(NumBytes);
+ .addImm(NumBytes).setMIFlags(MIFlags);
AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
- .addReg(LdReg, RegState::Kill);
+ .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
} else
- MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes);
+ MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
+ ARMCC::AL, 0, MIFlags);
// Emit add / sub.
int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
@@ -151,10 +166,11 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
/// a destreg = basereg + immediate in Thumb code.
void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
unsigned DestReg, unsigned BaseReg,
int NumBytes, const TargetInstrInfo &TII,
const ARMBaseRegisterInfo& MRI,
- DebugLoc dl) {
+ unsigned MIFlags) {
bool isSub = NumBytes < 0;
unsigned Bytes = (unsigned)NumBytes;
if (isSub) Bytes = -NumBytes;
@@ -211,8 +227,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
if (NumMIs > Threshold) {
// This will expand into too many instructions. Load the immediate from a
// constpool entry.
- emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
- MRI, dl);
+ emitThumbRegPlusImmInReg(MBB, MBBI, dl,
+ DestReg, BaseReg, NumBytes, true,
+ TII, MRI, MIFlags);
return;
}
@@ -224,11 +241,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
Bytes -= ThisVal;
const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
const MachineInstrBuilder MIB =
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg));
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags));
AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
} else {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
- .addReg(BaseReg, RegState::Kill);
+ .addReg(BaseReg, RegState::Kill)
+ .setMIFlags(MIFlags);
}
BaseReg = DestReg;
}
@@ -243,9 +261,10 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
if (NeedCC)
MIB = AddDefaultT1CC(MIB);
- MIB .addReg(DestReg).addImm(ThisVal);
+ MIB.addReg(DestReg).addImm(ThisVal);
if (NeedPred)
MIB = AddDefaultPred(MIB);
+ MIB.setMIFlags(MIFlags);
}
else {
bool isKill = BaseReg != ARM::SP;
@@ -255,8 +274,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
if (NeedPred)
MIB = AddDefaultPred(MIB);
- BaseReg = DestReg;
+ MIB.setMIFlags(MIFlags);
+ BaseReg = DestReg;
if (Opc == ARM::tADDrSPi) {
// r4 = add sp, imm
// r4 = add r4, imm
@@ -274,7 +294,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
const TargetInstrDesc &TID = TII.get(ExtraOpc);
AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
.addReg(DestReg, RegState::Kill)
- .addImm(((unsigned)NumBytes) & 3));
+ .addImm(((unsigned)NumBytes) & 3)
+ .setMIFlags(MIFlags));
}
}
@@ -283,8 +304,8 @@ static void emitSPUpdate(MachineBasicBlock &MBB,
const TargetInstrInfo &TII, DebugLoc dl,
const Thumb1RegisterInfo &MRI,
int NumBytes) {
- emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
- MRI, dl);
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+ MRI);
}
void Thumb1RegisterInfo::
@@ -337,7 +358,7 @@ static void emitThumbConstant(MachineBasicBlock &MBB,
DestReg))
.addImm(ThisVal));
if (Imm > 0)
- emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+ emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
if (isSub) {
const TargetInstrDesc &TID = TII.get(ARM::tRSB);
AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
@@ -430,8 +451,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
// MI would expand into a large number of instructions. Don't try to
// simplify the immediate.
if (NumMIs > 2) {
- emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
- *this, dl);
+ emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+ *this);
MBB.erase(II);
return true;
}
@@ -450,8 +471,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
}
Offset = (Offset - Mask * Scale);
MachineBasicBlock::iterator NII = llvm::next(II);
- emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
- *this, dl);
+ emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
+ *this);
} else {
// Translate r0 = add sp, -imm to
// r0 = -imm (this is then translated into a series of instructons)
@@ -645,15 +666,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
bool UseRR = false;
if (Opcode == ARM::tRestore) {
if (FrameReg == ARM::SP)
- emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
- Offset, false, TII, *this, dl);
+ emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
+ Offset, false, TII, *this);
else {
emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
UseRR = true;
}
} else {
- emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
- *this, dl);
+ emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
+ *this);
}
MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
@@ -668,15 +689,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (Opcode == ARM::tSpill) {
if (FrameReg == ARM::SP)
- emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg,
- Offset, false, TII, *this, dl);
+ emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
+ Offset, false, TII, *this);
else {
emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
UseRR = true;
}
} else
- emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
- *this, dl);
+ emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
+ *this);
MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
if (UseRR)
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 8a87cc5..9060e59 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -28,6 +28,11 @@ struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
public:
Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
+ const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
void emitLoadConstPool(MachineBasicBlock &MBB,
@@ -35,7 +40,8 @@ public:
DebugLoc dl,
unsigned DestReg, unsigned SubIdx, int Val,
ARMCC::CondCodes Pred = ARMCC::AL,
- unsigned PredReg = 0) const;
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
/// Code Generation virtual methods...
void eliminateCallFramePseudoInstr(MachineFunction &MF,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 9b1073b..d169dbb 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -184,7 +184,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
- const ARMBaseInstrInfo &TII) {
+ const ARMBaseInstrInfo &TII, unsigned MIFlags) {
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -198,14 +198,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
// Use a movw to materialize the 16-bit constant.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
.addImm(NumBytes)
- .addImm((unsigned)Pred).addReg(PredReg);
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
Fits = true;
} else if ((NumBytes & 0xffff) == 0) {
// Use a movt to materialize the 32-bit constant.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
.addReg(DestReg)
.addImm(NumBytes >> 16)
- .addImm((unsigned)Pred).addReg(PredReg);
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
Fits = true;
}
@@ -214,12 +214,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
.addReg(BaseReg, RegState::Kill)
.addReg(DestReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
} else {
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
.addReg(DestReg, RegState::Kill)
.addReg(BaseReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
}
return;
}
@@ -230,7 +232,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
unsigned Opc = 0;
if (DestReg == ARM::SP && BaseReg != ARM::SP) {
// mov sp, rn. Note t2MOVr cannot be used.
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg)
+ .addReg(BaseReg).setMIFlags(MIFlags);
BaseReg = ARM::SP;
continue;
}
@@ -243,7 +246,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
// FIXME: Fix Thumb1 immediate encoding.
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg).addImm(ThisVal/4);
+ .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags);
NumBytes = 0;
continue;
}
@@ -283,7 +286,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
.addReg(BaseReg, RegState::Kill)
- .addImm(ThisVal));
+ .addImm(ThisVal)).setMIFlags(MIFlags);
if (HasCCOut)
AddDefaultCC(MIB);
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 099b8f7..355c3bf 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -13,26 +13,15 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
-#include "ARMAddressingModes.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "Thumb2InstrInfo.h"
#include "Thumb2RegisterInfo.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLocation.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
@@ -42,13 +31,14 @@ Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
-void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx,
- int Val,
- ARMCC::CondCodes Pred,
- unsigned PredReg) const {
+void
+Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx,
+ int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) const {
MachineFunction &MF = *MBB.getParent();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
@@ -57,5 +47,6 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
.addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0);
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+ .setMIFlags(MIFlags);
}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index b3cf2e5..824378a 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -35,7 +35,8 @@ public:
DebugLoc dl,
unsigned DestReg, unsigned SubIdx, int Val,
ARMCC::CondCodes Pred = ARMCC::AL,
- unsigned PredReg = 0) const;
+ unsigned PredReg = 0,
+ unsigned MIFlags = MachineInstr::NoFlags) const;
};
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index cc8f61c..ce2e966 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -12,6 +12,7 @@
#include "ARMAddressingModes.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -49,82 +50,86 @@ namespace {
// 1 - No cc field.
// 2 - Always set CPSR.
unsigned PredCC2 : 2;
+ unsigned PartFlag : 1; // 16-bit instruction does partial flag update
unsigned Special : 1; // Needs to be dealt with specially
};
static const ReduceEntry ReduceTable[] = {
- // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S
- { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 },
- { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 },
+ // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S
+ { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 },
+ { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,0 },
+ { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 },
// Note: immediate scale is 4.
- { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 1 },
- { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 },
- { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 },
- { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 },
- { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 },
+ { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0,1 },
+ { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 },
+ { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 },
+ { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 },
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
- //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 },
- { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 },
- { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 1 },
- { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 },
+ //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 },
+ { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 },
// FIXME: adr.n immediate offset must be multiple of 4.
- //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 },
- { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 },
- { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 },
- { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 },
- { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 },
+ //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 },
+ { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 },
+ // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
+ // likely to cause issue in the loop. As a size / performance workaround,
+ // they are not marked as such.
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 },
// FIXME: Do we need the 16-bit 'S' variant?
- { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 },
- { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 },
- { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0 },
- { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 },
- { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 },
- { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 },
- { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 },
- { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 },
- { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 },
- { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 },
- { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 },
- { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 },
- { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 },
- { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 },
- { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 },
- { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 },
- { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 },
- { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 },
+ { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0,0 },
+ { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0,0 },
+ { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0,0 },
+ { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 },
+ { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 },
+ { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 },
+ { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 },
+ { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 },
+ { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 },
+ { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 },
+ { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 },
+ { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 },
// FIXME: Clean this up after splitting each Thumb load / store opcode
// into multiple ones.
- { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 },
- { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 },
- { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 1 },
- { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 1 },
-
- { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 1 },
- { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 },
- { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 },
+ { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 },
+
+ { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 },
// ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
- { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 1 },
- { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 },
+ { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 },
+ { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 },
};
class Thumb2SizeReduce : public MachineFunctionPass {
@@ -133,6 +138,7 @@ namespace {
Thumb2SizeReduce();
const Thumb2InstrInfo *TII;
+ const ARMSubtarget *STI;
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -144,6 +150,8 @@ namespace {
/// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+ bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+
bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
bool is2Addr, ARMCC::CondCodes Pred,
bool LiveCPSR, bool &HasCC, bool &CCDead);
@@ -152,19 +160,20 @@ namespace {
const ReduceEntry &Entry);
bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
- const ReduceEntry &Entry, bool LiveCPSR);
+ const ReduceEntry &Entry, bool LiveCPSR,
+ MachineInstr *CPSRDef);
/// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
/// instruction.
bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR);
+ bool LiveCPSR, MachineInstr *CPSRDef);
/// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
/// non-two-address instruction.
bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR);
+ bool LiveCPSR, MachineInstr *CPSRDef);
/// ReduceMBB - Reduce width of instructions in the specified basic block.
bool ReduceMBB(MachineBasicBlock &MBB);
@@ -187,6 +196,52 @@ static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
return false;
}
+/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
+/// the 's' 16-bit instruction partially update CPSR. Abort the
+/// transformation to avoid adding false dependency on last CPSR setting
+/// instruction which hurts the ability for out-of-order execution engine
+/// to do register renaming magic.
+/// This function checks if there is a read-of-write dependency between the
+/// last instruction that defines the CPSR and the current instruction. If there
+/// is, then there is no harm done since the instruction cannot be retired
+/// before the CPSR setting instruction anyway.
+/// Note, we are not doing full dependency analysis here for the sake of compile
+/// time. We're not looking for cases like:
+/// r0 = muls ...
+/// r1 = add.w r0, ...
+/// ...
+/// = mul.w r1
+/// In this case it would have been ok to narrow the mul.w to muls since there
+/// are indirect RAW dependency between the muls and the mul.w
+bool
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
+ if (!Def || !STI->avoidCPSRPartialUpdate())
+ return false;
+
+ SmallSet<unsigned, 2> Defs;
+ for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Def->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || Reg == ARM::CPSR)
+ continue;
+ Defs.insert(Reg);
+ }
+
+ for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = Use->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (Defs.count(Reg))
+ return false;
+ }
+
+ // No read-after-write dependency. The narrowing will add false dependency.
+ return true;
+}
+
bool
Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
bool is2Addr, ARMCC::CondCodes Pred,
@@ -410,7 +465,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
MIB.addOperand(MI->getOperand(OpNum));
// Transfer memoperands.
- (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
@@ -422,7 +480,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR) {
+ bool LiveCPSR, MachineInstr *CPSRDef) {
if (Entry.LowRegs1 && !VerifyLowRegs(MI))
return false;
@@ -440,12 +498,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
switch (Opc) {
default: break;
case ARM::t2ADDSri: {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
return true;
// fallthrough
}
case ARM::t2ADDSrr:
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
}
}
break;
@@ -453,13 +511,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2RSBri:
case ARM::t2RSBSri:
if (MI->getOperand(2).getImm() == 0)
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
break;
case ARM::t2MOVi16:
// Can convert only 'pure' immediate operands, not immediates obtained as
// globals' addresses.
if (MI->getOperand(1).isImm())
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
break;
case ARM::t2CMPrr: {
// Try to reduce to the lo-reg only version first. Why there are two
@@ -468,17 +526,17 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// are prioritized, but the table assumes a unique entry for each
// source insn opcode. So for now, we hack a local entry record to use.
static const ReduceEntry NarrowEntry =
- { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 };
- if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR))
+ { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
}
case ARM::t2ADDrSPi: {
static const ReduceEntry NarrowEntry =
- { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 };
+ { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 };
if (MI->getOperand(0).getReg() == ARM::SP)
- return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR);
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+ return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
}
}
return false;
@@ -487,7 +545,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR) {
+ bool LiveCPSR, MachineInstr *CPSRDef) {
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
@@ -542,6 +600,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
return false;
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(CPSRDef, MI))
+ return false;
+
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
@@ -563,6 +627,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
MIB.addOperand(MI->getOperand(i));
}
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
MBB.erase(MI);
@@ -573,7 +640,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR) {
+ bool LiveCPSR, MachineInstr *CPSRDef) {
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
@@ -626,6 +693,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
return false;
+ // Avoid adding a false dependency on partial flag update by some 16-bit
+ // instructions which has the 's' bit set.
+ if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC &&
+ canAddPseudoFlagDep(CPSRDef, MI))
+ return false;
+
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
@@ -663,6 +736,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (!TID.isPredicable() && NewTID.isPredicable())
AddDefaultPred(MIB);
+ // Transfer MI flags.
+ MIB.setMIFlags(MI->getFlags());
+
DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB);
MBB.erase(MI);
@@ -670,7 +746,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
return true;
}
-static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
bool HasDef = false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
@@ -678,6 +754,8 @@ static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
continue;
if (MO.getReg() != ARM::CPSR)
continue;
+
+ DefCPSR = true;
if (!MO.isDead())
HasDef = true;
}
@@ -707,6 +785,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+ MachineInstr *CPSRDef = 0;
MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator NextMII;
@@ -722,7 +801,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
const ReduceEntry &Entry = ReduceTable[OPI->second];
// Ignore "special" cases for now.
if (Entry.Special) {
- if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) {
+ if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
Modified = true;
MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I;
@@ -731,7 +810,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
}
// Try to transform to a 16-bit two-address instruction.
- if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
+ if (Entry.NarrowOpc2 &&
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
Modified = true;
MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I;
@@ -739,7 +819,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
}
// Try to transform to a 16-bit non-two-address instruction.
- if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
+ if (Entry.NarrowOpc1 &&
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
Modified = true;
MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I;
@@ -747,7 +828,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
}
ProcessNext:
- LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR);
+ bool DefCPSR = false;
+ LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
+ if (MI->getDesc().isCall())
+ // Calls don't really set CPSR.
+ CPSRDef = 0;
+ else if (DefCPSR)
+ // This is the last CPSR defining instruction.
+ CPSRDef = MI;
}
return Modified;
@@ -756,6 +844,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ STI = &TM.getSubtarget<ARMSubtarget>();
bool Modified = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td
index 4508eda..ae79c2e 100644
--- a/lib/Target/Alpha/Alpha.td
+++ b/lib/Target/Alpha/Alpha.td
@@ -21,7 +21,7 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
- "Enable CIX extentions">;
+ "Enable CIX extensions">;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index c4f43ab..ee404f0 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -296,7 +296,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token chain and
// flag operands which copy the outgoing args into registers. The InFlag in
- // necessary since all emited instructions must be stuck together.
+ // necessary since all emitted instructions must be stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 099d715..b201712 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -1030,7 +1030,7 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),
//WMB Mfc 18.4400 Write memory barrier
//MF_FPCR F-P 17.025 Move from FPCR
//MT_FPCR F-P 17.024 Move to FPCR
-//There are in the Multimedia extentions, so let's not use them yet
+//There are in the Multimedia extensions, so let's not use them yet
//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt
index 9ae1517..cc170e3 100644
--- a/lib/Target/Alpha/README.txt
+++ b/lib/Target/Alpha/README.txt
@@ -33,9 +33,9 @@ add crazy vector instructions (MVI):
(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word
PKWB, UNPKBW pack/unpack word to byte
PKLB UNPKBL pack/unpack long to byte
-PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b))
+PERR pixel error (sum across bytes of bytewise abs(i8v8 a - i8v8 b))
-cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions)
+cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extensions)
this has some good examples for other operations that can be synthesised well
from these rather meager vector ops (such as saturating add).
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 7c80eec..1e1f8c9 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -345,7 +345,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emited instructions must be
+ // The InFlag in necessary since all emitted instructions must be
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 6c555a3..358d1b3 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2440,24 +2440,6 @@ void CWriter::visitReturnInst(ReturnInst &I) {
return;
}
- if (I.getNumOperands() > 1) {
- Out << " {\n";
- Out << " ";
- printType(Out, I.getParent()->getParent()->getReturnType());
- Out << " llvm_cbe_mrv_temp = {\n";
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- Out << " ";
- writeOperand(I.getOperand(i));
- if (i != e - 1)
- Out << ",";
- Out << "\n";
- }
- Out << " };\n";
- Out << " return llvm_cbe_mrv_temp;\n";
- Out << " }\n";
- return;
- }
-
Out << " return";
if (I.getNumOperands()) {
Out << ' ';
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 5ef5716..f340edf 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -24,7 +24,7 @@
// 5. The code sequences for r64 and v2i64 are probably overly conservative,
// compared to the code that gcc produces.
//
-// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// selb instruction definition for i64. Note that the selection mask is
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 4040461..fd96694 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -182,6 +182,10 @@ namespace {
printOp(MI->getOperand(OpNo), O);
}
+ void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
// Used to generate a ".-<target>", but it turns out that the assembler
// really wants the target.
@@ -279,6 +283,9 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
}
O << *Mang->getSymbol(MO.getGlobal());
return;
+ case MachineOperand::MO_MCSymbol:
+ O << *(MO.getMCSymbol());
+ return;
default:
O << "<unknown operand type: " << MO.getType() << ">";
return;
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index d226156..9351ffd 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -321,12 +321,17 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
// These match the addr256k operand type:
EVT OffsVT = MVT::i16;
SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+ int64_t val;
switch (N.getOpcode()) {
case ISD::Constant:
+ val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
+ Base = CurDAG->getTargetConstant( val , MVT::i32);
+ Index = Zero;
+ return true; break;
case ISD::ConstantPool:
case ISD::GlobalAddress:
- report_fatal_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered.");
+ report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
/*NOTREACHED*/
case ISD::TargetConstant:
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 743a4d7..8668da3 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -705,7 +705,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
offset
));
- // Shift the low similarily
+ // Shift the low similarly
// TODO: add SPUISD::SHL_BYTES
low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index dd48d7b..cf883e2 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -183,14 +183,6 @@ namespace llvm {
virtual bool isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const;
-
- /// After allocating this many registers, the allocator should feel
- /// register pressure. The value is a somewhat random guess, based on the
- /// number of non callee saved registers in the C calling convention.
- virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
- MachineFunction &MF) const{
- return 50;
- }
};
}
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
index 21bc275..bdbe255 100644
--- a/lib/Target/CellSPU/SPUInstrFormats.td
+++ b/lib/Target/CellSPU/SPUInstrFormats.td
@@ -296,3 +296,25 @@ class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
let Pattern = pattern;
let Inst{31-0} = 0;
}
+
+//===----------------------------------------------------------------------===//
+// Branch hint formats
+//===----------------------------------------------------------------------===//
+// For hbrr and hbra
+class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
+ : Instruction {
+ field bits<32> Inst;
+ bits<16>i16;
+ bits<9>RO;
+
+ let Namespace = "SPU";
+ let InOperandList = IOL;
+ let OutOperandList = (outs); //no output
+ let AsmString = asmstr;
+ let Itinerary = BranchHints;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-8} = RO{8-7};
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RO{6-0};
+}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index f9e6c72..080434d 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCContext.h"
using namespace llvm;
@@ -281,9 +282,20 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
return true;
}
+// search MBB for branch hint labels and branch hit ops
+static void removeHBR( MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
+ if (I->getOpcode() == SPU::HBRA ||
+ I->getOpcode() == SPU::HBR_LABEL){
+ I=MBB.erase(I);
+ }
+ }
+}
+
unsigned
SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
+ removeHBR(MBB);
if (I == MBB.begin())
return 0;
--I;
@@ -314,6 +326,23 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
+/** Find the optimal position for a hint branch instruction in a basic block.
+ * This should take into account:
+ * -the branch hint delays
+ * -congestion of the memory bus
+ * -dual-issue scheduling (i.e. avoid insertion of nops)
+ * Current implementation is rather simplistic.
+ */
+static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
+{
+ MachineBasicBlock::iterator J = MBB.end();
+ for( int i=0; i<8; i++) {
+ if( J == MBB.begin() ) return J;
+ J--;
+ }
+ return J;
+}
+
unsigned
SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -324,32 +353,61 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"SPU branch conditions have two components!");
+ MachineInstrBuilder MIB;
+ //TODO: make a more accurate algorithm.
+ bool haveHBR = MBB.size()>8;
+
+ removeHBR(MBB);
+ MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
+ // Add a label just before the branch
+ if (haveHBR)
+ MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
+
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) {
// Unconditional branch
- MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR));
+ MIB = BuildMI(&MBB, DL, get(SPU::BR));
MIB.addMBB(TBB);
DEBUG(errs() << "Inserted one-way uncond branch: ");
DEBUG((*MIB).dump());
+
+ // basic blocks have just one branch so it is safe to add the hint a its
+ if (haveHBR) {
+ MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(TBB);
+ }
} else {
// Conditional branch
- MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+ if (haveHBR) {
+ MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(TBB);
+ }
+
DEBUG(errs() << "Inserted one-way cond branch: ");
DEBUG((*MIB).dump());
}
return 1;
} else {
- MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
// Two-way Conditional Branch.
MIB.addReg(Cond[1].getReg()).addMBB(TBB);
MIB2.addMBB(FBB);
+ if (haveHBR) {
+ MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(FBB);
+ }
+
DEBUG(errs() << "Inserted conditional branch: ");
DEBUG((*MIB).dump());
DEBUG(errs() << "part 2: ");
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 25f6fd0..e103c9b 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -28,6 +28,8 @@ let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
"${:comment} ADJCALLSTACKUP",
[(callseq_end timm:$amt)]>;
+ def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ),
+ "$targ:\t${:comment}branch hint target",[ ]>;
}
//===----------------------------------------------------------------------===//
@@ -2013,9 +2015,9 @@ class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
RotShiftVec, pattern>;
class SHLHVecInst<ValueType vectype>:
- SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+ SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (vectype VECREG:$rT),
- (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>;
+ (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
multiclass ShiftLeftHalfword
{
@@ -2063,9 +2065,9 @@ class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
multiclass ShiftLeftWord
{
def v4i32:
- SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB),
+ SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[(set (v4i32 VECREG:$rT),
- (SPUvec_shl (v4i32 VECREG:$rA), R16C:$rB))]>;
+ (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
def r32:
SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
[(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
@@ -2511,19 +2513,11 @@ class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
RotShiftVec, pattern>;
def ROTHMv8i16:
- ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* see patterns below - $rB must be negated */]>;
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R32C:$rB),
- (ROTHMv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R16C:$rB),
- (ROTHMv8i16 VECREG:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R8C:$rB),
- (ROTHMv8i16 VECREG:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
// Note: This instruction doesn't match a pattern because rB must be negated
@@ -2584,19 +2578,11 @@ class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
RotShiftVec, pattern>;
def ROTMv4i32:
- ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* see patterns below - $rB must be negated */]>;
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R32C:$rB),
- (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R16C:$rB),
- (ROTMv4i32 VECREG:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R8C:$rB),
- (ROTMv4i32 VECREG:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
def ROTMr32:
ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
@@ -2802,20 +2788,12 @@ defm ROTQMBII: RotateMaskQuadByBitsImm;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def ROTMAHv8i16:
- RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"rotmah\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB),
- (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R16C:$rB),
- (ROTMAHv8i16 VECREG:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB),
- (ROTMAHv8i16 VECREG:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
def ROTMAHr16:
RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
@@ -2857,20 +2835,12 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)),
(ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
def ROTMAv4i32:
- RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
"rotma\t$rT, $rA, $rB", RotShiftVec,
[/* see patterns below - $rB must be negated */]>;
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB),
- (ROTMAv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
-
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R16C:$rB),
- (ROTMAv4i32 VECREG:$rA,
- (SFIr32 (XSHWr16 R16C:$rB), 0))>;
-
-def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB),
- (ROTMAv4i32 VECREG:$rA,
- (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
def ROTMAr32:
RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
@@ -4208,8 +4178,8 @@ def : Pat<(fabs (v4f32 VECREG:$rA)),
//===----------------------------------------------------------------------===//
// Hint for branch instructions:
//===----------------------------------------------------------------------===//
-
-/* def HBR : SPUInstr<(outs), (ins), "hbr\t" */
+def HBRA :
+ HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
//===----------------------------------------------------------------------===//
// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 641da04..1708c59 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -46,6 +46,14 @@ namespace llvm {
virtual const TargetRegisterClass *
getPointerRegClass(unsigned Kind = 0) const;
+ /// After allocating this many registers, the allocator should feel
+ /// register pressure. The value is a somewhat random guess, based on the
+ /// number of non callee saved registers in the C calling convention.
+ virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+ MachineFunction &MF) const{
+ return 50;
+ }
+
//! Return the array of callee-saved registers
virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 71d6049..797cfd5 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1348,12 +1348,10 @@ void CppWriter::printInstruction(const Instruction *I,
const PHINode* phi = cast<PHINode>(I);
Out << "PHINode* " << iName << " = PHINode::Create("
- << getCppName(phi->getType()) << ", \"";
+ << getCppName(phi->getType()) << ", "
+ << phi->getNumIncomingValues() << ", \"";
printEscapedString(phi->getName());
Out << "\", " << bbname << ");";
- nl(Out) << iName << "->reserveOperandSpace("
- << phi->getNumIncomingValues()
- << ");";
nl(Out);
for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
Out << iName << "->addIncoming("
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 3379ac2..060a87b 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -57,18 +57,26 @@ static unsigned mblazeBinary2Opcode[] = {
};
static unsigned getRD(uint32_t insn) {
+ if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F))
+ return UNSUPPORTED;
return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
}
static unsigned getRA(uint32_t insn) {
+ if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F))
+ return UNSUPPORTED;
return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
}
static unsigned getRB(uint32_t insn) {
+ if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F))
+ return UNSUPPORTED;
return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
}
static int64_t getRS(uint32_t insn) {
+ if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF))
+ return UNSUPPORTED;
return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
}
@@ -489,13 +497,14 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
raw_ostream &vStream) const {
// The machine instruction.
uint32_t insn;
+ uint64_t read;
uint8_t bytes[4];
- // We always consume 4 bytes of data
- size = 4;
+ // By default we consume 1 byte on failure
+ size = 1;
// We want to read exactly 4 bytes of data.
- if (region.readBytes(address, 4, (uint8_t*)bytes, NULL) == -1)
+ if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4)
return false;
// Encoded as a big-endian 32-bit word in the stream.
@@ -509,44 +518,63 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
instr.setOpcode(opcode);
+ unsigned RD = getRD(insn);
+ unsigned RA = getRA(insn);
+ unsigned RB = getRB(insn);
+ unsigned RS = getRS(insn);
+
uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
switch ((tsFlags & MBlazeII::FormMask)) {
- default: llvm_unreachable("unknown instruction encoding");
+ default:
+ return false;
case MBlazeII::FRRRR:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
- instr.addOperand(MCOperand::CreateReg(getRB(insn)));
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RB));
+ instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FRRR:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
- instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRI:
switch (opcode) {
- default: llvm_unreachable("unknown instruction encoding");
+ default:
+ return false;
case MBlaze::MFS:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ if (RD == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
break;
case MBlaze::MTS:
+ if (RA == UNSUPPORTED)
+ return false;
instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlaze::MSRSET:
case MBlaze::MSRCLR:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ if (RD == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
break;
}
break;
case MBlazeII::FRRI:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
switch (opcode) {
default:
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
@@ -560,27 +588,37 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
break;
case MBlazeII::FCRR:
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
- instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ if (RA == UNSUPPORTED || RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RA));
+ instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FCRI:
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ if (RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
break;
case MBlazeII::FRCR:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
- instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ if (RD == UNSUPPORTED || RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRCI:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ if (RD == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
break;
case MBlazeII::FCCR:
- instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ if (RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FCCI:
@@ -588,33 +626,45 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
break;
case MBlazeII::FRRCI:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
break;
case MBlazeII::FRRC:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FRCX:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ if (RD == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
break;
case MBlazeII::FRCS:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
- instr.addOperand(MCOperand::CreateReg(getRS(insn)));
+ if (RD == UNSUPPORTED || RS == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
+ instr.addOperand(MCOperand::CreateReg(RS));
break;
case MBlazeII::FCRCS:
- instr.addOperand(MCOperand::CreateReg(getRS(insn)));
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ if (RS == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RS));
+ instr.addOperand(MCOperand::CreateReg(RA));
break;
case MBlazeII::FCRCX:
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ if (RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RA));
instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
break;
@@ -623,16 +673,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr,
break;
case MBlazeII::FCR:
- instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+ if (RB == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RB));
break;
case MBlazeII::FRIR:
- instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+ if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+ return false;
+ instr.addOperand(MCOperand::CreateReg(RD));
instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
- instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+ instr.addOperand(MCOperand::CreateReg(RA));
break;
}
+ // We always consume 4 bytes of data on success
+ size = 4;
+
return true;
}
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index bebc6c8..13c4b49 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -18,11 +18,12 @@
namespace llvm {
class MCOperand;
+ class TargetMachine;
class MBlazeInstPrinter : public MCInstPrinter {
public:
- MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
- }
+ MBlazeInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
virtual void printInst(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index 1fa1e4d..1245658 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -31,49 +31,28 @@ def MBlazeInstrInfo : InstrInfo;
// Microblaze Subtarget features //
//===----------------------------------------------------------------------===//
-def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true",
- "Implements 3-stage pipeline">;
def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true",
"Implements barrel shifter">;
def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true",
"Implements hardware divider">;
def FeatureMul : SubtargetFeature<"mul", "HasMul", "true",
"Implements hardware multiplier">;
-def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true",
- "Implements FSL instructions">;
-def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true",
- "Implements extended FSL instructions">;
-def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true",
- "Implements MSR register set and clear">;
-def FeatureException : SubtargetFeature<"exception", "HasException", "true",
- "Implements hardware exception support">;
def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true",
"Implements pattern compare instruction">;
def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true",
"Implements floating point unit">;
-def FeatureESR : SubtargetFeature<"esr", "HasESR", "true",
- "Implements ESR and EAR registers">;
-def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true",
- "Implements processor version register">;
def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true",
"Implements multiplier with 64-bit result">;
def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
"Implements sqrt and floating point convert">;
-def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true",
- "Implements memory management unit">;
//===----------------------------------------------------------------------===//
// MBlaze processors supported.
//===----------------------------------------------------------------------===//
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, MBlazeGenericItineraries, Features>;
-
-def : Proc<"v400", []>;
-def : Proc<"v500", []>;
-def : Proc<"v600", []>;
-def : Proc<"v700", []>;
-def : Proc<"v710", []>;
+def : Processor<"mblaze", MBlazeGenericItineraries, []>;
+def : Processor<"mblaze3", MBlazePipe3Itineraries, []>;
+def : Processor<"mblaze5", MBlazePipe5Itineraries, []>;
//===----------------------------------------------------------------------===//
// Instruction Descriptions
diff --git a/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MBlazeAsmBackend.cpp
index a4b21af..08f14c3 100644
--- a/lib/Target/MBlaze/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmBackend.cpp
@@ -150,14 +150,13 @@ void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin())
assert(0 && "Mac not supported on MBlaze");
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
+
+ if (TheTriple.isOSWindows())
assert(0 && "Windows not supported on MBlaze");
- default:
- return new ELFMBlazeAsmBackend(T, Triple(TT).getOS());
- }
+
+ return new ELFMBlazeAsmBackend(T, TheTriple.getOS());
}
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 0016df5..0f0f60e 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -319,10 +319,11 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
}
static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+ TargetMachine &TM,
unsigned SyntaxVariant,
const MCAsmInfo &MAI) {
if (SyntaxVariant == 0)
- return new MBlazeInstPrinter(MAI);
+ return new MBlazeInstPrinter(TM, MAI);
return 0;
}
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 4399ee2..973e968 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -77,7 +77,7 @@ static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
// We must assume that unknown immediate values require more than
// 16-bits to represent.
- if (mop.isGlobal() || mop.isSymbol())
+ if (mop.isGlobal() || mop.isSymbol() || mop.isJTI() || mop.isCPI())
return true;
// FIXME: we could probably check to see if the FP value happens
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index f39826b..21a5988 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -274,7 +274,7 @@ MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
F->insert(It, loop);
F->insert(It, finish);
- // Update machine-CFG edges by transfering adding all successors and
+ // Update machine-CFG edges by transferring adding all successors and
// remaining instructions from the current block to the new block which
// will contain the Phi node for the select.
finish->splice(finish->begin(), MBB,
@@ -456,7 +456,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
F->insert(It, start);
F->insert(It, exit);
- // Update machine-CFG edges by transfering adding all successors and
+ // Update machine-CFG edges by transferring adding all successors and
// remaining instructions from the current block to the new block which
// will contain the Phi node for the select.
exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
@@ -778,7 +778,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emited instructions must be
+ // The InFlag in necessary since all emitted instructions must be
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
@@ -1103,7 +1103,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight(
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
- break;
+ break;
case 'd':
case 'y':
if (type->isIntegerTy())
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
index 094de5c..4acdcfd 100644
--- a/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -21,22 +21,22 @@
class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>;
+ [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IIC_MEMl>;
class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
TB<op, (outs GPR:$dst), (ins memri:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
+ [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIStore>;
+ [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIC_MEMs>;
class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
TB<op, (outs), (ins GPR:$dst, memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIStore>;
+ [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
InstrItinClass itin> :
@@ -56,15 +56,10 @@ class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
!strconcat(instr_asm, " $dst, $c, $b"),
[(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
-class LogicF<bits<6> op, string instr_asm> :
- TB<op, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
- !strconcat(instr_asm, " $dst, $b, $c"),
- [], IIAlu>;
-
class LogicFI<bits<6> op, string instr_asm> :
TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [], IIAlu>;
+ [], IIC_ALU>;
let rb=0 in {
class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
@@ -95,10 +90,10 @@ let rb=0 in {
//===----------------------------------------------------------------------===//
let Predicates=[HasFPU] in {
def FORI : LogicFI<0x28, "ori ">;
- def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>;
- def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>;
- def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>;
- def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>;
+ def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIC_FPU>;
+ def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIC_FPU>;
+ def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIC_FPU>;
+ def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIC_FPUd>;
}
let Predicates=[HasFPU], isCodeGenOnly=1 in {
@@ -110,19 +105,19 @@ let Predicates=[HasFPU], isCodeGenOnly=1 in {
}
let Predicates=[HasFPU,HasSqrt] in {
- def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>;
- def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>;
- def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIAlu>;
+ def FLT : ArithIF<0x16, 0x280, "flt ", IIC_FPUf>;
+ def FINT : ArithFI<0x16, 0x300, "fint ", IIC_FPUi>;
+ def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIC_FPUs>;
}
let isAsCheapAsAMove = 1 in {
- def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>;
- def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>;
- def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>;
- def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>;
- def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>;
- def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>;
- def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>;
+ def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIC_FPUc>;
+ def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIC_FPUc>;
+ def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIC_FPUc>;
+ def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIC_FPUc>;
+ def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIC_FPUc>;
+ def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIC_FPUc>;
+ def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIC_FPUc>;
}
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
index 3209845..3082a7e 100644
--- a/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -13,7 +13,7 @@
class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
!strconcat(instr_asm, " $dst, $b"),
- [(set GPR:$dst, (OpNode immZExt4:$b))],IIAlu>
+ [(set GPR:$dst, (OpNode immZExt4:$b))],IIC_FSLg>
{
bits<5> rd;
bits<4> fslno;
@@ -29,7 +29,7 @@ class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
!strconcat(instr_asm, " $dst, $b"),
- [(set GPR:$dst, (OpNode GPR:$b))], IIAlu>
+ [(set GPR:$dst, (OpNode GPR:$b))], IIC_FSLg>
{
bits<5> rd;
bits<5> rb;
@@ -45,7 +45,7 @@ class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
!strconcat(instr_asm, " $v, $b"),
- [(OpNode GPR:$v, immZExt4:$b)], IIAlu>
+ [(OpNode GPR:$v, immZExt4:$b)], IIC_FSLp>
{
bits<5> ra;
bits<4> fslno;
@@ -61,7 +61,7 @@ class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
!strconcat(instr_asm, " $v, $b"),
- [(OpNode GPR:$v, GPR:$b)], IIAlu>
+ [(OpNode GPR:$v, GPR:$b)], IIC_FSLp>
{
bits<5> ra;
bits<5> rb;
@@ -77,7 +77,7 @@ class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
!strconcat(instr_asm, " $b"),
- [(OpNode immZExt4:$b)], IIAlu>
+ [(OpNode immZExt4:$b)], IIC_FSLp>
{
bits<4> fslno;
@@ -92,7 +92,7 @@ class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
MBlazeInst<op, FCR, (outs), (ins GPR:$b),
!strconcat(instr_asm, " $b"),
- [(OpNode GPR:$b)], IIAlu>
+ [(OpNode GPR:$b)], IIC_FSLp>
{
bits<5> rb;
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index d62574d..54f605f 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -81,7 +81,7 @@ class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
// Pseudo instruction class
//===----------------------------------------------------------------------===//
class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
- MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>;
+ MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIC_Pseudo>;
//===----------------------------------------------------------------------===//
// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index b353dcd..794ebed 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -17,6 +17,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index b7300c1..b717da8 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -261,7 +261,6 @@ public:
virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
const;
-
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 7b8f70a..896e8ea 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -47,22 +47,22 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
//===----------------------------------------------------------------------===//
// MBlaze Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasPipe3 : Predicate<"Subtarget.hasPipe3()">;
+// def HasPipe3 : Predicate<"Subtarget.hasPipe3()">;
def HasBarrel : Predicate<"Subtarget.hasBarrel()">;
-def NoBarrel : Predicate<"!Subtarget.hasBarrel()">;
+// def NoBarrel : Predicate<"!Subtarget.hasBarrel()">;
def HasDiv : Predicate<"Subtarget.hasDiv()">;
def HasMul : Predicate<"Subtarget.hasMul()">;
-def HasFSL : Predicate<"Subtarget.hasFSL()">;
-def HasEFSL : Predicate<"Subtarget.hasEFSL()">;
-def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">;
-def HasException : Predicate<"Subtarget.hasException()">;
+// def HasFSL : Predicate<"Subtarget.hasFSL()">;
+// def HasEFSL : Predicate<"Subtarget.hasEFSL()">;
+// def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">;
+// def HasException : Predicate<"Subtarget.hasException()">;
def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">;
def HasFPU : Predicate<"Subtarget.hasFPU()">;
-def HasESR : Predicate<"Subtarget.hasESR()">;
-def HasPVR : Predicate<"Subtarget.hasPVR()">;
+// def HasESR : Predicate<"Subtarget.hasESR()">;
+// def HasPVR : Predicate<"Subtarget.hasPVR()">;
def HasMul64 : Predicate<"Subtarget.hasMul64()">;
def HasSqrt : Predicate<"Subtarget.hasSqrt()">;
-def HasMMU : Predicate<"Subtarget.hasMMU()">;
+// def HasMMU : Predicate<"Subtarget.hasMMU()">;
//===----------------------------------------------------------------------===//
// MBlaze Operand, Complex Patterns and Transformations Definitions.
@@ -170,18 +170,18 @@ class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
Operand Od, PatLeaf imm_type> :
TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_ALU>;
class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [], IIAlu>;
+ [], IIC_ALU>;
class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
Operand Od, PatLeaf imm_type> :
SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
+ [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_SHT>;
class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
InstrItinClass itin> :
@@ -193,7 +193,7 @@ class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
Operand Od, PatLeaf imm_type> :
TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $c, $b"),
- [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIAlu>;
+ [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIC_ALU>;
class ArithN<bits<6> op, bits<11> flags, string instr_asm,
InstrItinClass itin> :
@@ -204,7 +204,7 @@ class ArithN<bits<6> op, bits<11> flags, string instr_asm,
class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [], IIAlu>;
+ [], IIC_ALU>;
class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
InstrItinClass itin> :
@@ -215,7 +215,7 @@ class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
!strconcat(instr_asm, " $dst, $b, $c"),
- [], IIAlu>;
+ [], IIC_ALU>;
//===----------------------------------------------------------------------===//
// Misc Arithmetic Instructions
@@ -224,23 +224,23 @@ class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIAlu>;
+ [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIC_ALU>;
class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
[(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
- IIAlu>;
+ IIC_ALU>;
class LogicI32<bits<6> op, string instr_asm> :
TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [], IIAlu>;
+ [], IIC_ALU>;
class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
!strconcat(instr_asm, " $dst, $b, $c"),
- [], IIAlu>;
+ [], IIC_ALU>;
//===----------------------------------------------------------------------===//
// Memory Access Instructions
@@ -248,22 +248,22 @@ class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [], IILoad>;
+ [], IIC_MEMl>;
class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
TB<op, (outs GPR:$dst), (ins memri:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
+ [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>;
class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [], IIStore>;
+ [], IIC_MEMs>;
class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
TB<op, (outs), (ins GPR:$dst, memri:$addr),
!strconcat(instr_asm, " $dst, $addr"),
- [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIStore>;
+ [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIC_MEMs>;
//===----------------------------------------------------------------------===//
// Branch Instructions
@@ -271,7 +271,7 @@ class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
TA<op, flags, (outs), (ins GPR:$target),
!strconcat(instr_asm, " $target"),
- [], IIBranch> {
+ [], IIC_BR> {
let rd = 0x0;
let ra = br;
let Form = FCCR;
@@ -280,7 +280,7 @@ class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
class BranchI<bits<6> op, bits<5> br, string instr_asm> :
TB<op, (outs), (ins brtarget:$target),
!strconcat(instr_asm, " $target"),
- [], IIBranch> {
+ [], IIC_BR> {
let rd = 0;
let ra = br;
let Form = FCCI;
@@ -292,7 +292,7 @@ class BranchI<bits<6> op, bits<5> br, string instr_asm> :
class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
!strconcat(instr_asm, " $link, $target"),
- [], IIBranch> {
+ [], IIC_BRl> {
let ra = br;
let Form = FRCR;
}
@@ -300,7 +300,7 @@ class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
!strconcat(instr_asm, " $link, $target"),
- [], IIBranch> {
+ [], IIC_BRl> {
let ra = br;
let Form = FRCI;
}
@@ -312,7 +312,7 @@ class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
TA<op, flags, (outs),
(ins GPR:$a, GPR:$b),
!strconcat(instr_asm, " $a, $b"),
- [], IIBranch> {
+ [], IIC_BRc> {
let rd = br;
let Form = FCRR;
}
@@ -320,7 +320,7 @@ class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
TB<op, (outs), (ins GPR:$a, brtarget:$offset),
!strconcat(instr_asm, " $a, $offset"),
- [], IIBranch> {
+ [], IIC_BRc> {
let rd = br;
let Form = FCRI;
}
@@ -330,71 +330,74 @@ class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
//===----------------------------------------------------------------------===//
let isCommutable = 1, isAsCheapAsAMove = 1 in {
- def ADDK : Arith<0x04, 0x000, "addk ", add, IIAlu>;
+ def ADDK : Arith<0x04, 0x000, "addk ", add, IIC_ALU>;
def AND : Logic<0x21, 0x000, "and ", and>;
def OR : Logic<0x20, 0x000, "or ", or>;
def XOR : Logic<0x22, 0x000, "xor ", xor>;
- def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
- def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
- def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+
+ let Predicates=[HasPatCmp] in {
+ def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+ def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+ def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+ }
let Defs = [CARRY] in {
- def ADD : Arith<0x00, 0x000, "add ", addc, IIAlu>;
+ def ADD : Arith<0x00, 0x000, "add ", addc, IIC_ALU>;
let Uses = [CARRY] in {
- def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>;
+ def ADDC : Arith<0x02, 0x000, "addc ", adde, IIC_ALU>;
}
}
let Uses = [CARRY] in {
- def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>;
+ def ADDKC : ArithN<0x06, 0x000, "addkc ", IIC_ALU>;
}
}
let isAsCheapAsAMove = 1 in {
- def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>;
- def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>;
- def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>;
- def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIAlu>;
+ def ANDN : ArithN<0x23, 0x000, "andn ", IIC_ALU>;
+ def CMP : ArithN<0x05, 0x001, "cmp ", IIC_ALU>;
+ def CMPU : ArithN<0x05, 0x003, "cmpu ", IIC_ALU>;
+ def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIC_ALU>;
let Defs = [CARRY] in {
- def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIAlu>;
+ def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIC_ALU>;
let Uses = [CARRY] in {
- def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>;
+ def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIC_ALU>;
}
}
let Uses = [CARRY] in {
- def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+ def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIC_ALU>;
}
}
let isCommutable = 1, Predicates=[HasMul] in {
- def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>;
+ def MUL : Arith<0x10, 0x000, "mul ", mul, IIC_ALUm>;
}
let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
- def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>;
- def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>;
+ def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIC_ALUm>;
+ def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIC_ALUm>;
}
let Predicates=[HasMul,HasMul64] in {
- def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+ def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIC_ALUm>;
}
let Predicates=[HasBarrel] in {
- def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>;
- def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>;
- def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>;
+ def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIC_SHT>;
+ def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIC_SHT>;
+ def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIC_SHT>;
def BSRLI : ShiftI<0x19, 0x0, "bsrli ", srl, uimm5, immZExt5>;
def BSRAI : ShiftI<0x19, 0x1, "bsrai ", sra, uimm5, immZExt5>;
def BSLLI : ShiftI<0x19, 0x2, "bslli ", shl, uimm5, immZExt5>;
}
let Predicates=[HasDiv] in {
- def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIAlu>;
- def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIAlu>;
+ def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIC_ALUd>;
+ def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIC_ALUd>;
}
//===----------------------------------------------------------------------===//
@@ -552,7 +555,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
def RTSD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
"rtsd $target, $imm",
[],
- IIBranch>;
+ IIC_BR>;
}
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
@@ -560,7 +563,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
def RTID : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
"rtid $target, $imm",
[],
- IIBranch>;
+ IIC_BR>;
}
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
@@ -568,7 +571,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
def RTBD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
"rtbd $target, $imm",
[],
- IIBranch>;
+ IIC_BR>;
}
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
@@ -576,7 +579,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
def RTED : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
"rted $target, $imm",
[],
- IIBranch>;
+ IIC_BR>;
}
//===----------------------------------------------------------------------===//
@@ -584,7 +587,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in {
- def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIAlu>;
+ def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>;
}
let usesCustomInserter = 1 in {
@@ -611,17 +614,17 @@ let usesCustomInserter = 1 in {
let rb = 0 in {
def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
- "sext16 $dst, $src", [], IIAlu>;
+ "sext16 $dst, $src", [], IIC_ALU>;
def SEXT8 : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
- "sext8 $dst, $src", [], IIAlu>;
+ "sext8 $dst, $src", [], IIC_ALU>;
let Defs = [CARRY] in {
def SRL : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
- "srl $dst, $src", [], IIAlu>;
+ "srl $dst, $src", [], IIC_ALU>;
def SRA : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
- "sra $dst, $src", [], IIAlu>;
+ "sra $dst, $src", [], IIC_ALU>;
let Uses = [CARRY] in {
def SRC : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
- "src $dst, $src", [], IIAlu>;
+ "src $dst, $src", [], IIC_ALU>;
}
}
}
@@ -637,36 +640,36 @@ let isCodeGenOnly=1 in {
//===----------------------------------------------------------------------===//
let Form=FRCS in {
def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
- "mfs $dst, $src", [], IIAlu>;
+ "mfs $dst, $src", [], IIC_ALU>;
}
let Form=FCRCS in {
def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
- "mts $dst, $src", [], IIAlu>;
+ "mts $dst, $src", [], IIC_ALU>;
}
def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
- "msrset $dst, $set", [], IIAlu>;
+ "msrset $dst, $set", [], IIC_ALU>;
def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
- "msrclr $dst, $clr", [], IIAlu>;
+ "msrclr $dst, $clr", [], IIC_ALU>;
let rd=0x0, Form=FCRR in {
def WDC : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
- "wdc $a, $b", [], IIAlu>;
+ "wdc $a, $b", [], IIC_WDC>;
def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
- "wdc.flush $a, $b", [], IIAlu>;
+ "wdc.flush $a, $b", [], IIC_WDC>;
def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
- "wdc.clear $a, $b", [], IIAlu>;
+ "wdc.clear $a, $b", [], IIC_WDC>;
def WIC : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
- "wic $a, $b", [], IIAlu>;
+ "wic $a, $b", [], IIC_WDC>;
}
def BRK : BranchL<0x26, 0x0C, 0x000, "brk ">;
def BRKI : BranchLI<0x2E, 0x0C, "brki ">;
def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
- "imm $imm", [], IIAlu>;
+ "imm $imm", [], IIC_ALU>;
//===----------------------------------------------------------------------===//
// Pseudo instructions for atomic operations
@@ -848,11 +851,6 @@ def : Pat<(MBWrapper tconstpool:$in), (ORI (i32 R0), tconstpool:$in)>;
// Misc instructions
def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
-// Arithmetic with immediates
-def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDIK GPR:$in, imm:$imm)>;
-def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>;
-def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>;
-
// Convert any extend loads into zero extend loads
def : Pat<(extloadi8 iaddr:$src), (i32 (LBUI iaddr:$src))>;
def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index fa9140d..ed8511d 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -181,6 +181,26 @@ unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
return 0; // Not reached
}
+bool MBlazeRegisterInfo::isRegister(unsigned Reg) {
+ return Reg <= 31;
+}
+
+bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) {
+ switch (Reg) {
+ case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 :
+ case 0x0007 : case 0x000B : case 0x000D : case 0x1000 :
+ case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 :
+ case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 :
+ case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 :
+ case 0x2008 : case 0x2009 : case 0x200A : case 0x200B :
+ return true;
+
+ default:
+ return false;
+ }
+ return false; // Not reached
+}
+
unsigned MBlazeRegisterInfo::getPICCallReg() {
return MBlaze::R20;
}
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 839536d..69ec5aa 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -45,6 +45,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
static unsigned getRegisterNumbering(unsigned RegEnum);
static unsigned getRegisterFromNumbering(unsigned RegEnum);
static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
+ static bool isRegister(unsigned RegEnum);
+ static bool isSpecialRegister(unsigned RegEnum);
/// Get PIC indirect call register
static unsigned getPICCallReg();
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index fbefb22..1a695a7 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -85,18 +85,19 @@ let Namespace = "MBlaze" in {
def RTLBX : MBlazeSPRReg<0x1002, "rtlbx">, DwarfRegNum<[41]>;
def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
- def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[44]>;
- def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[45]>;
- def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[46]>;
- def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[47]>;
- def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[48]>;
- def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[49]>;
- def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[50]>;
- def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[51]>;
- def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[52]>;
- def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[53]>;
- def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[54]>;
- def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[55]>;
+ def RTLBSX : MBlazeSPRReg<0x1004, "rtlbsx">, DwarfRegNum<[44]>;
+ def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[45]>;
+ def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[46]>;
+ def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[47]>;
+ def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[48]>;
+ def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[49]>;
+ def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[50]>;
+ def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[51]>;
+ def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[52]>;
+ def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[53]>;
+ def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[54]>;
+ def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[55]>;
+ def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[56]>;
// The carry bit. In the Microblaze this is really bit 29 of the
// MSR register but this is the only bit of that register that we
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index ac4d98c..4662f25 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -8,57 +8,48 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files.
+// MBlaze functional units.
//===----------------------------------------------------------------------===//
-def ALU : FuncUnit;
-def IMULDIV : FuncUnit;
+def IF : FuncUnit;
+def ID : FuncUnit;
+def EX : FuncUnit;
+def MA : FuncUnit;
+def WB : FuncUnit;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for MBlaze
//===----------------------------------------------------------------------===//
-def IIAlu : InstrItinClass;
-def IILoad : InstrItinClass;
-def IIStore : InstrItinClass;
-def IIXfer : InstrItinClass;
-def IIBranch : InstrItinClass;
-def IIHiLo : InstrItinClass;
-def IIImul : InstrItinClass;
-def IIIdiv : InstrItinClass;
-def IIFcvt : InstrItinClass;
-def IIFmove : InstrItinClass;
-def IIFcmp : InstrItinClass;
-def IIFadd : InstrItinClass;
-def IIFmulSingle : InstrItinClass;
-def IIFmulDouble : InstrItinClass;
-def IIFdivSingle : InstrItinClass;
-def IIFdivDouble : InstrItinClass;
-def IIFsqrtSingle : InstrItinClass;
-def IIFsqrtDouble : InstrItinClass;
-def IIFrecipFsqrtStep : InstrItinClass;
-def IIPseudo : InstrItinClass;
+def IIC_ALU : InstrItinClass;
+def IIC_ALUm : InstrItinClass;
+def IIC_ALUd : InstrItinClass;
+def IIC_SHT : InstrItinClass;
+def IIC_FSLg : InstrItinClass;
+def IIC_FSLp : InstrItinClass;
+def IIC_MEMs : InstrItinClass;
+def IIC_MEMl : InstrItinClass;
+def IIC_FPU : InstrItinClass;
+def IIC_FPUd : InstrItinClass;
+def IIC_FPUf : InstrItinClass;
+def IIC_FPUi : InstrItinClass;
+def IIC_FPUs : InstrItinClass;
+def IIC_FPUc : InstrItinClass;
+def IIC_BR : InstrItinClass;
+def IIC_BRc : InstrItinClass;
+def IIC_BRl : InstrItinClass;
+def IIC_WDC : InstrItinClass;
+def IIC_Pseudo : InstrItinClass;
//===----------------------------------------------------------------------===//
-// MBlaze Generic instruction itineraries.
+// MBlaze generic instruction itineraries.
//===----------------------------------------------------------------------===//
-def MBlazeGenericItineraries : ProcessorItineraries<
- [ALU, IMULDIV], [], [
- InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
- InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
- InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>,
- InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>,
- InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>,
- InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
- InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>,
- InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>,
- InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>,
- InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>,
- InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>,
- InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>,
- InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>,
- InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>,
- InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>,
- InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]>
-]>;
+def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for three stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule3.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for five stage pipeline.
+//===----------------------------------------------------------------------===//
+include "MBlazeSchedule5.td"
diff --git a/lib/Target/MBlaze/MBlazeSchedule3.td b/lib/Target/MBlaze/MBlazeSchedule3.td
new file mode 100644
index 0000000..ccbf99d
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSchedule3.td
@@ -0,0 +1,236 @@
+//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the three stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe3Itineraries : ProcessorItineraries<
+ [IF,ID,EX], [], [
+
+ // ALU instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the stages. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU multiply instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages except the execute stage, which takes three cycles. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALUm,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<3,[EX]>], // three cycles in execute stage
+ [ 4 // result ready after four cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU divide instruction with one destination register two register source
+ // operands. The instruction takes one cycle to execute in each the pipeline
+ // stages except the execute stage, which takes 34 cycles. The two
+ // source operands are read during the decode stage and the result is ready
+ // after the execute stage.
+ InstrItinData< IIC_ALUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<34,[EX]>], // 34 cycles in execute stage
+ [ 35 // result ready after 35 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Shift instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the pipeline stages
+ // except the execute stage, which takes two cycles. The two source operands
+ // are read during the decode stage and the result is ready after the execute
+ // stage.
+ InstrItinData< IIC_SHT,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after three cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch instruction with one source operand register. The instruction takes
+ // one cycle to execute in each of the pipeline stages. The source operand is
+ // read during the decode stage.
+ InstrItinData< IIC_BR,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 1 ]>, // first operand read after one cycle
+
+ // Conditional branch instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages. The
+ // two source operands are read during the decode stage.
+ InstrItinData< IIC_BRc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch and link instruction with one destination register and one source
+ // operand register. The instruction takes one cycle to execute in each of
+ // the pipeline stages. The source operand is read during the decode stage
+ // and the destination register is ready after the execute stage.
+ InstrItinData< IIC_BRl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]>], // one cycle in execute stage
+ [ 2 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Cache control instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages
+ // except the memory access stage, which takes two cycles. The source
+ // operands are read during the decode stage.
+ InstrItinData< IIC_WDC,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point instruction with one destination register and two source
+ // operand registers. The instruction takes one cycle to execute in each of
+ // the pipeline stages except the execute stage, which takes six cycles. The
+ // source operands are read during the decode stage and the results are ready
+ // after the execute stage.
+ InstrItinData< IIC_FPU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<6,[EX]>], // six cycles in execute stage
+ [ 7 // result ready after seven cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point divide instruction with one destination register and two
+ // source operand registers. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes 30
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<30,[EX]>], // one cycle in execute stage
+ [ 31 // result ready after 31 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Convert floating point to integer instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the execute stage,
+ // which takes seven cycles. The source operands are read during the decode
+ // stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUi,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<7,[EX]>], // seven cycles in execute stage
+ [ 8 // result ready after eight cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Convert integer to floating point instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the execute stage,
+ // which takes six cycles. The source operands are read during the decode
+ // stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUf,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<6,[EX]>], // six cycles in execute stage
+ [ 7 // result ready after seven cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point square root instruction with one destination register and
+ // one source operand register. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes 29
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<29,[EX]>], // 29 cycles in execute stage
+ [ 30 // result ready after 30 cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point comparison instruction with one destination register and
+ // two source operand registers. The instruction takes one cycle to execute
+ // in each of the pipeline stages except the execute stage, which takes three
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the execute stage.
+ InstrItinData< IIC_FPUc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<3,[EX]>], // three cycles in execute stage
+ [ 4 // result ready after four cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // FSL get instruction with one register or immediate source operand and one
+ // destination register. The instruction takes one cycle to execute in each
+ // of the pipeline stages except the execute stage, which takes two cycles.
+ // The one source operand is read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_FSLg,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // FSL put instruction with either two register source operands or one
+ // register source operand and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes two
+ // cycles. The two source operands are read during the decode stage.
+ InstrItinData< IIC_FSLp,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Memory store instruction with either three register source operands or two
+ // register source operands and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the execute stage, which takes two
+ // cycles. All of the source operands are read during the decode stage.
+ InstrItinData< IIC_MEMs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 1 // first operand read after one cycle
+ , 1 // second operand read after one cycle
+ , 1 ]>, // third operand read after one cycle
+
+ // Memory load instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages except the execute stage, which takes two cycles. All of
+ // the source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_MEMl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<2,[EX]>], // two cycles in execute stage
+ [ 3 // result ready after four cycles
+ , 1 // second operand read after one cycle
+ , 1 ]> // third operand read after one cycle
+]>;
diff --git a/lib/Target/MBlaze/MBlazeSchedule5.td b/lib/Target/MBlaze/MBlazeSchedule5.td
new file mode 100644
index 0000000..fa88766
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSchedule5.td
@@ -0,0 +1,267 @@
+//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze instruction itineraries for the five stage pipeline.
+//===----------------------------------------------------------------------===//
+def MBlazePipe5Itineraries : ProcessorItineraries<
+ [IF,ID,EX,MA,WB], [], [
+
+ // ALU instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the stages. The
+ // two source operands are read during the decode stage and the result is
+ // ready after the execute stage.
+ InstrItinData< IIC_ALU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU multiply instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages. The two source operands are read during the decode stage
+ // and the result is ready after the execute stage.
+ InstrItinData< IIC_ALUm,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // ALU divide instruction with one destination register two register source
+ // operands. The instruction takes one cycle to execute in each the pipeline
+ // stages except the memory access stage, which takes 31 cycles. The two
+ // source operands are read during the decode stage and the result is ready
+ // after the memory access stage.
+ InstrItinData< IIC_ALUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<31,[MA]> // 31 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 33 // result ready after 33 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Shift instruction with one destination register and either two register
+ // source operands or one register source operand and one immediate operand.
+ // The instruction takes one cycle to execute in each of the pipeline stages.
+ // The two source operands are read during the decode stage and the result is
+ // ready after the memory access stage.
+ InstrItinData< IIC_SHT,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 3 // result ready after three cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch instruction with one source operand register. The instruction takes
+ // one cycle to execute in each of the pipeline stages. The source operand is
+ // read during the decode stage.
+ InstrItinData< IIC_BR,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 ]>, // first operand read after one cycle
+
+ // Conditional branch instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages. The
+ // two source operands are read during the decode stage.
+ InstrItinData< IIC_BRc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Branch and link instruction with one destination register and one source
+ // operand register. The instruction takes one cycle to execute in each of
+ // the pipeline stages. The source operand is read during the decode stage
+ // and the destination register is ready after the writeback stage.
+ InstrItinData< IIC_BRl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 4 // result ready after four cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Cache control instruction with two source operand registers. The
+ // instruction takes one cycle to execute in each of the pipeline stages
+ // except the memory access stage, which takes two cycles. The source
+ // operands are read during the decode stage.
+ InstrItinData< IIC_WDC,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point instruction with one destination register and two source
+ // operand registers. The instruction takes one cycle to execute in each of
+ // the pipeline stages except the memory access stage, which takes two
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPU,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 5 // result ready after five cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Floating point divide instruction with one destination register and two
+ // source operand registers. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the memory access stage, which takes 26
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPUd,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<26,[MA]> // 26 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 29 // result ready after 29 cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Convert floating point to integer instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the memory access stage,
+ // which takes three cycles. The source operands are read during the decode
+ // stage and the results are ready after the writeback stage.
+ InstrItinData< IIC_FPUi,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<3,[MA]> // three cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 6 // result ready after six cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Convert integer to floating point instruction with one destination
+ // register and one source operand register. The instruction takes one cycle
+ // to execute in each of the pipeline stages except the memory access stage,
+ // which takes two cycles. The source operands are read during the decode
+ // stage and the results are ready after the writeback stage.
+ InstrItinData< IIC_FPUf,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<2,[MA]> // two cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 5 // result ready after five cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point square root instruction with one destination register and
+ // one source operand register. The instruction takes one cycle to execute in
+ // each of the pipeline stages except the memory access stage, which takes 25
+ // cycles. The source operands are read during the decode stage and the
+ // results are ready after the writeback stage.
+ InstrItinData< IIC_FPUs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<25,[MA]> // 25 cycles in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 28 // result ready after 28 cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // Floating point comparison instruction with one destination register and
+ // two source operand registers. The instruction takes one cycle to execute
+ // in each of the pipeline stages. The source operands are read during the
+ // decode stage and the results are ready after the execute stage.
+ InstrItinData< IIC_FPUc,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // FSL get instruction with one register or immediate source operand and one
+ // destination register. The instruction takes one cycle to execute in each
+ // of the pipeline stages. The one source operand is read during the decode
+ // stage and the result is ready after the execute stage.
+ InstrItinData< IIC_FSLg,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 2 // result ready after two cycles
+ , 1 ]>, // first operand read after one cycle
+
+ // FSL put instruction with either two register source operands or one
+ // register source operand and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages. The two source operands are read during the
+ // decode stage.
+ InstrItinData< IIC_FSLp,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 ]>, // second operand read after one cycle
+
+ // Memory store instruction with either three register source operands or two
+ // register source operands and one immediate operand. There is no result
+ // produced by the instruction. The instruction takes one cycle to execute in
+ // each of the pipeline stages. All of the source operands are read during
+ // the decode stage.
+ InstrItinData< IIC_MEMs,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 1 // first operand read after one cycle
+ , 1 // second operand read after one cycle
+ , 1 ]>, // third operand read after one cycle
+
+ // Memory load instruction with one destination register and either two
+ // register source operands or one register source operand and one immediate
+ // operand. The instruction takes one cycle to execute in each of the
+ // pipeline stages. All of the source operands are read during the decode
+ // stage and the result is ready after the writeback stage.
+ InstrItinData< IIC_MEMl,
+ [ InstrStage<1,[IF]> // one cycle in fetch stage
+ , InstrStage<1,[ID]> // one cycle in decode stage
+ , InstrStage<1,[EX]> // one cycle in execute stage
+ , InstrStage<1,[MA]> // one cycle in memory access stage
+ , InstrStage<1,[WB]>], // one cycle in write back stage
+ [ 4 // result ready after four cycles
+ , 1 // second operand read after one cycle
+ , 1 ]> // third operand read after one cycle
+]>;
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
index 3440521..a80744a 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -13,19 +13,39 @@
#include "MBlazeSubtarget.h"
#include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
#include "MBlazeGenSubtarget.inc"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS):
- HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false),
- HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false),
- HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false),
- HasMul64(false), HasSqrt(false), HasMMU(false)
+ HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false),
+ HasFPU(false), HasMul64(false), HasSqrt(false)
{
- std::string CPU = "v400";
- MBlazeArchVersion = V400;
-
// Parse features string.
- ParseSubtargetFeatures(FS, CPU);
+ std::string CPU = "mblaze";
+ CPU = ParseSubtargetFeatures(FS, CPU);
+
+ // Only use instruction scheduling if the selected CPU has an instruction
+ // itinerary (the default CPU is the only one that doesn't).
+ HasItin = CPU != "mblaze";
+ DEBUG(dbgs() << "CPU " << CPU << "(" << HasItin << ")\n");
+
+ // Compute the issue width of the MBlaze itineraries
+ computeIssueWidth();
+}
+
+void MBlazeSubtarget::computeIssueWidth() {
+ InstrItins.IssueWidth = 1;
+}
+
+bool MBlazeSubtarget::
+enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&MBlaze::GPRRegClass);
+ return HasItin && OptLevel >= CodeGenOpt::Default;
}
+
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index bebb3f7..2255b28 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -24,29 +24,14 @@ namespace llvm {
class MBlazeSubtarget : public TargetSubtarget {
protected:
-
- enum MBlazeArchEnum {
- V400, V500, V600, V700, V710
- };
-
- // MBlaze architecture version
- MBlazeArchEnum MBlazeArchVersion;
-
- bool HasPipe3;
bool HasBarrel;
bool HasDiv;
bool HasMul;
- bool HasFSL;
- bool HasEFSL;
- bool HasMSRSet;
- bool HasException;
bool HasPatCmp;
bool HasFPU;
- bool HasESR;
- bool HasPVR;
bool HasMul64;
bool HasSqrt;
- bool HasMMU;
+ bool HasItin;
InstrItineraryData InstrItins;
@@ -61,18 +46,26 @@ public:
std::string ParseSubtargetFeatures(const std::string &FS,
const std::string &CPU);
+ /// Compute the number of maximum number of issues per cycle for the
+ /// MBlaze scheduling itineraries.
+ void computeIssueWidth();
+
+ /// enablePostRAScheduler - True at 'More' optimization.
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ bool hasItin() const { return HasItin; }
+ bool hasPCMP() const { return HasPatCmp; }
bool hasFPU() const { return HasFPU; }
bool hasSqrt() const { return HasSqrt; }
bool hasMul() const { return HasMul; }
bool hasMul64() const { return HasMul64; }
bool hasDiv() const { return HasDiv; }
bool hasBarrel() const { return HasBarrel; }
-
- bool isV400() const { return MBlazeArchVersion == V400; }
- bool isV500() const { return MBlazeArchVersion == V500; }
- bool isV600() const { return MBlazeArchVersion == V600; }
- bool isV700() const { return MBlazeArchVersion == V700; }
- bool isV710() const { return MBlazeArchVersion == V710; }
};
} // End llvm namespace
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index cd949e1..df34a83 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -36,19 +36,18 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
bool RelaxAll,
bool NoExecStack) {
Triple TheTriple(TT);
- switch (TheTriple.getOS()) {
- case Triple::Darwin:
+
+ if (TheTriple.isOSDarwin()) {
llvm_unreachable("MBlaze does not support Darwin MACH-O format");
return NULL;
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
+ }
+
+ if (TheTriple.isOSWindows()) {
llvm_unreachable("MBlaze does not support Windows COFF format");
return NULL;
- default:
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll,
- NoExecStack);
}
+
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
@@ -87,7 +86,8 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
InstrInfo(*this),
FrameLowering(Subtarget),
- TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) {
+ TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
if (getRelocationModel() == Reloc::Default) {
setRelocationModel(Reloc::Static);
}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 45ad078..48ce37a 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -38,13 +38,18 @@ namespace llvm {
MBlazeSelectionDAGInfo TSInfo;
MBlazeIntrinsicInfo IntrinsicInfo;
MBlazeELFWriterInfo ELFWriterInfo;
+ InstrItineraryData InstrItins;
+
public:
MBlazeTargetMachine(const Target &T, const std::string &TT,
- const std::string &FS);
+ const std::string &FS);
virtual const MBlazeInstrInfo *getInstrInfo() const
{ return &InstrInfo; }
+ virtual const InstrItineraryData *getInstrItineraryData() const
+ { return &InstrItins; }
+
virtual const TargetFrameLowering *getFrameLowering() const
{ return &FrameLowering; }
diff --git a/lib/Target/MBlaze/TODO b/lib/Target/MBlaze/TODO
index 2e613eb..317d7c0 100644
--- a/lib/Target/MBlaze/TODO
+++ b/lib/Target/MBlaze/TODO
@@ -9,8 +9,6 @@
needs to be examined more closely:
- The stack layout needs to be examined to make sure it meets
the standard, especially in regards to var arg functions.
- - The processor itineraries are copied from a different backend
- and need to be updated to model the MicroBlaze correctly.
- Look at the MBlazeGenFastISel.inc stuff and make use of it
if appropriate.
@@ -18,9 +16,6 @@
There are a few things that need to be looked at:
- There are some instructions that are not generated by the backend
and have not been tested as far as the parser is concerned.
- - The assembly parser does not use any MicroBlaze specific directives.
+ - The assembly parser does not use many MicroBlaze specific directives.
I should investigate if there are MicroBlaze specific directive and,
if there are, add them.
- - The instruction MFS and MTS use special names for some of the
- special registers that can be accessed. These special register
- names should be parsed by the assembly parser.
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index f0e1ce2..63860dc 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -18,11 +18,12 @@
namespace llvm {
class MCOperand;
+ class TargetMachine;
class MSP430InstPrinter : public MCInstPrinter {
public:
- MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
- }
+ MSP430InstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {}
virtual void printInst(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index a1a7f44..5264d68 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -164,10 +164,11 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+ TargetMachine &TM,
unsigned SyntaxVariant,
const MCAsmInfo &MAI) {
if (SyntaxVariant == 0)
- return new MSP430InstPrinter(MAI);
+ return new MSP430InstPrinter(TM, MAI);
return 0;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index a95d59c..006785b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -515,7 +515,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token chain and
// flag operands which copy the outgoing args into registers. The InFlag in
- // necessary since all emited instructions must be stuck together.
+ // necessary since all emitted instructions must be stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 26df1a0..8939b0a 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -13,6 +13,7 @@ tablegen(MipsGenSubtarget.inc -gen-subtarget)
add_llvm_target(MipsCodeGen
MipsAsmPrinter.cpp
MipsDelaySlotFiller.cpp
+ MipsExpandPseudo.cpp
MipsInstrInfo.cpp
MipsISelDAGToDAG.cpp
MipsISelLowering.cpp
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index a9ab050..05b4c5a 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the entry points for global functions defined in
+// This file contains the entry points for global functions defined in
// the LLVM Mips back-end.
//
//===----------------------------------------------------------------------===//
@@ -25,6 +25,7 @@ namespace llvm {
FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+ FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
extern Target TheMipsTarget;
extern Target TheMipselTarget;
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 3e6437b..b79016d 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -59,7 +59,7 @@ def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
"Mips2 ISA Support">;
def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
- "Mips32 ISA Support",
+ "Mips32 ISA Support",
[FeatureCondMov, FeatureBitCount]>;
def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
"Mips32r2", "Mips32r2 ISA Support",
@@ -81,7 +81,7 @@ def : Proc<"r6000", [FeatureMips2]>;
def : Proc<"4ke", [FeatureMips32r2]>;
-// Allegrex is a 32bit subset of r4000, both for interger and fp registers,
+// Allegrex is a 32bit subset of r4000, both for integer and fp registers,
// but much more similar to Mips2 than Mips3. It also contains some of
// Mips32/Mips32r2 instructions and a custom vector fpu processor.
def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index bd28a9b..502f744 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -30,7 +30,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
@@ -53,14 +53,14 @@ namespace {
return "Mips Assembly Printer";
}
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
- void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = 0);
- void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = 0);
void printSavedRegsBitmask(raw_ostream &O);
void printHex32(unsigned int Value, raw_ostream &O);
@@ -77,7 +77,8 @@ namespace {
}
virtual void EmitFunctionBodyStart();
virtual void EmitFunctionBodyEnd();
- virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+ MBB) const;
static const char *getRegisterName(unsigned RegNo);
virtual void EmitFunctionEntryLabel();
@@ -94,12 +95,12 @@ namespace {
// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
// Describe the stack frame.
//
-// -- Mask directives "(f)mask bitmask, offset"
+// -- Mask directives "(f)mask bitmask, offset"
// Tells the assembler which registers are saved and where.
-// bitmask - contain a little endian bitset indicating which registers are
-// saved on function prologue (e.g. with a 0x80000000 mask, the
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
// assembler knows the register 31 (RA) is saved at prologue.
-// offset - the position before stack pointer subtraction indicating where
+// offset - the position before stack pointer subtraction indicating where
// the first saved register on prologue is located. (e.g. with a
//
// Consider the following function prologue:
@@ -110,9 +111,9 @@ namespace {
// sw $ra, 40($sp)
// sw $fp, 36($sp)
//
-// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
-// 30 (FP) are saved at prologue. As the save order on prologue is from
-// left to right, RA is saved first. A -8 offset means that after the
+// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+// 30 (FP) are saved at prologue. As the save order on prologue is from
+// left to right, RA is saved first. A -8 offset means that after the
// stack pointer subtration, the first register in the mask (RA) will be
// saved at address 48-8=40.
//
@@ -122,7 +123,7 @@ namespace {
// Mask directives
//===----------------------------------------------------------------------===//
-// Create a bitmask with all callee saved registers for CPU or Floating Point
+// Create a bitmask with all callee saved registers for CPU or Floating Point
// registers. For CPU registers consider RA, GP and FP for saving if necessary.
void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
const TargetFrameLowering *TFI = TM.getFrameLowering();
@@ -168,7 +169,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
// Print a 32 bit hex number with all numbers.
void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
O << "0x";
- for (int i = 7; i >= 0; i--)
+ for (int i = 7; i >= 0; i--)
O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
}
@@ -191,9 +192,9 @@ void MipsAsmPrinter::emitFrameDirective() {
}
/// Emit Set directives.
-const char *MipsAsmPrinter::getCurrentABIString() const {
+const char *MipsAsmPrinter::getCurrentABIString() const {
switch (Subtarget->getTargetABI()) {
- case MipsSubtarget::O32: return "abi32";
+ case MipsSubtarget::O32: return "abi32";
case MipsSubtarget::O64: return "abiO64";
case MipsSubtarget::N32: return "abiN32";
case MipsSubtarget::N64: return "abi64";
@@ -203,7 +204,7 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
llvm_unreachable("Unknown Mips ABI");
return NULL;
-}
+}
void MipsAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
@@ -214,7 +215,7 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
/// the first basic block in the function.
void MipsAsmPrinter::EmitFunctionBodyStart() {
emitFrameDirective();
-
+
SmallString<128> Str;
raw_svector_ostream OS(Str);
printSavedRegsBitmask(OS);
@@ -226,7 +227,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() {
void MipsAsmPrinter::EmitFunctionBodyEnd() {
// There are instruction for this macros, but they must
// always be at the function end, and we can't emit and
- // break with BB logic.
+ // break with BB logic.
OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
@@ -236,8 +237,8 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
/// exactly one predecessor and the control transfer mechanism between
/// the predecessor and this block is a fall-through.
-bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
- const {
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+ MBB) const {
// The predecessor has to be immediately before this block.
const MachineBasicBlock *Pred = *MBB->pred_begin();
@@ -246,16 +247,41 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *
if (const BasicBlock *bb = Pred->getBasicBlock())
if (isa<SwitchInst>(bb->getTerminator()))
return false;
+
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
- return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+ // Otherwise, check the last instruction.
+ // Check if the last terminator is an unconditional branch.
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+
+ return !I->getDesc().isBarrier();
}
// Print out an operand for an inline asm expression.
-bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,const char *ExtraCode,
raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
- if (ExtraCode && ExtraCode[0])
+ if (ExtraCode && ExtraCode[0])
return true; // Unknown modifier.
printOperand(MI, OpNo, O);
@@ -273,22 +299,9 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
switch(MO.getTargetFlags()) {
case MipsII::MO_GPREL: O << "%gp_rel("; break;
case MipsII::MO_GOT_CALL: O << "%call16("; break;
- case MipsII::MO_GOT: {
- const MachineOperand &LastMO = MI->getOperand(opNum-1);
- bool LastMOIsGP = LastMO.getType() == MachineOperand::MO_Register
- && LastMO.getReg() == Mips::GP;
- if (MI->getOpcode() == Mips::LW || LastMOIsGP)
- O << "%got(";
- else
- O << "%lo(";
- break;
- }
- case MipsII::MO_ABS_HILO:
- if (MI->getOpcode() == Mips::LUi)
- O << "%hi(";
- else
- O << "%lo(";
- break;
+ case MipsII::MO_GOT: O << "%got("; break;
+ case MipsII::MO_ABS_HI: O << "%hi("; break;
+ case MipsII::MO_ABS_LO: O << "%lo("; break;
}
switch (MO.getType()) {
@@ -308,6 +321,12 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << *Mang->getSymbol(MO.getGlobal());
break;
+ case MachineOperand::MO_BlockAddress: {
+ MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress());
+ O << BA->getName();
+ break;
+ }
+
case MachineOperand::MO_ExternalSymbol:
O << *GetExternalSymbolSymbol(MO.getSymbolName());
break;
@@ -323,7 +342,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
if (MO.getOffset())
O << "+" << MO.getOffset();
break;
-
+
default:
llvm_unreachable("<unknown operand type>");
}
@@ -336,7 +355,7 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
const MachineOperand &MO = MI->getOperand(opNum);
if (MO.isImm())
O << (unsigned short int)MO.getImm();
- else
+ else
printOperand(MI, opNum, O);
}
@@ -352,8 +371,8 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
return;
}
- // Load/Store memory operands -- imm($reg)
- // If PIC target the target is loaded as the
+ // Load/Store memory operands -- imm($reg)
+ // If PIC target the target is loaded as the
// pattern lw $25,%call16($28)
printOperand(MI, opNum, O);
O << "(";
@@ -365,12 +384,12 @@ void MipsAsmPrinter::
printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier) {
const MachineOperand& MO = MI->getOperand(opNum);
- O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
+ O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
}
void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// FIXME: Use SwitchSection.
-
+
// Tell the assembler which ABI we are using
OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
@@ -383,11 +402,11 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
// return to previous section
- OutStreamer.EmitRawText(StringRef("\t.previous"));
+ OutStreamer.EmitRawText(StringRef("\t.previous"));
}
// Force static initialization.
-extern "C" void LLVMInitializeMipsAsmPrinter() {
+extern "C" void LLVMInitializeMipsAsmPrinter() {
RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 8f313ef..57aeb1d 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,23 +1,23 @@
//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
// This describes the calling conventions for Mips architecture.
//===----------------------------------------------------------------------===//
/// CCIfSubtarget - Match if the current subtarget has a feature F.
-class CCIfSubtarget<string F, CCAction A>:
+class CCIfSubtarget<string F, CCAction A>:
CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
//===----------------------------------------------------------------------===//
// Mips O32 Calling Convention
//===----------------------------------------------------------------------===//
-// Only the return rules are defined here for O32. The rules for argument
+// Only the return rules are defined here for O32. The rules for argument
// passing are defined in MipsISelLowering.cpp.
def RetCC_MipsO32 : CallingConv<[
// i32 are returned in registers V0, V1
@@ -41,15 +41,15 @@ def CC_MipsEABI : CallingConv<[
// Integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
- // Single fp arguments are passed in pairs within 32-bit mode
- CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
+ // Single fp arguments are passed in pairs within 32-bit mode
+ CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
- CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+ CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
CCAssignToReg<[F12, F14, F16, F18]>>>,
- // The first 4 doubl fp arguments are passed in single fp registers.
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+ // The first 4 double fp arguments are passed in single fp registers.
+ CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
CCAssignToReg<[D6, D7, D8, D9]>>>,
// Integer values get stored in stack slots that are 4 bytes in
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
new file mode 100644
index 0000000..4423f51
--- /dev/null
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -0,0 +1,117 @@
+//===-- MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands pseudo instructions into target instructions after register
+// allocation but before post-RA scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-expand-pseudo"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+namespace {
+ struct MipsExpandPseudo : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ MipsExpandPseudo(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "Mips PseudoInstrs Expansion";
+ }
+
+ bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ private:
+ void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator);
+ void ExpandExtractElementF64(MachineBasicBlock&,
+ MachineBasicBlock::iterator);
+ };
+ char MipsExpandPseudo::ID = 0;
+} // end of anonymous namespace
+
+bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) {
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I)
+ Changed |= runOnMachineBasicBlock(*I);
+
+ return Changed;
+}
+
+bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
+
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
+ const TargetInstrDesc& Tid = I->getDesc();
+
+ switch(Tid.getOpcode()) {
+ default:
+ ++I;
+ continue;
+ case Mips::BuildPairF64:
+ ExpandBuildPairF64(MBB, I);
+ break;
+ case Mips::ExtractElementF64:
+ ExpandExtractElementF64(MBB, I);
+ break;
+ }
+
+ // delete original instr
+ MBB.erase(I++);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+ const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
+ DebugLoc dl = I->getDebugLoc();
+ const unsigned* SubReg =
+ TM.getRegisterInfo()->getSubRegisters(DstReg);
+
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg);
+ BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg);
+}
+
+void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned SrcReg = I->getOperand(1).getReg();
+ unsigned N = I->getOperand(2).getImm();
+ const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
+ DebugLoc dl = I->getDebugLoc();
+ const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
+
+ BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));
+}
+
+/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo
+/// instrs into real instrs
+FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) {
+ return new MipsExpandPseudo(tm);
+}
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 87a097a..21e3314 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -203,6 +203,46 @@ void MipsFrameLowering::adjustMipsStackFrame(MachineFunction &MF) const {
MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
}
+
+// expand pair of register and immediate if the immediate doesn't fit in the
+// 16-bit offset field.
+// e.g.
+// if OrigImm = 0x10000, OrigReg = $sp:
+// generate the following sequence of instrs:
+// lui $at, hi(0x10000)
+// addu $at, $sp, $at
+//
+// (NewReg, NewImm) = ($at, lo(Ox10000))
+// return true
+static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm,
+ unsigned& NewReg, int& NewImm,
+ MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator I) {
+ // OrigImm fits in the 16-bit field
+ if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+ NewReg = OrigReg;
+ NewImm = OrigImm;
+ return false;
+ }
+
+ MachineFunction* MF = MBB.getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ DebugLoc DL = I->getDebugLoc();
+ int ImmLo = OrigImm & 0xffff;
+ int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
+ ((OrigImm & 0x8000) != 0);
+
+ // FIXME: change this when mips goes MC".
+ BuildMI(MBB, I, DL, TII->get(Mips::NOAT));
+ BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+ BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
+ .addReg(Mips::AT);
+ NewReg = Mips::AT;
+ NewImm = ImmLo;
+
+ return true;
+}
+
void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -214,6 +254,9 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+ unsigned NewReg = 0;
+ int NewImm = 0;
+ bool ATUsed;
// Get the right frame order for Mips.
adjustMipsStackFrame(MF);
@@ -236,22 +279,40 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
// Adjust stack : addi sp, sp, (-imm)
+ ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
+ MBBI);
BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
- .addReg(Mips::SP).addImm(-StackSize);
+ .addReg(NewReg).addImm(NewImm);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
- // Save the return address only if the function isnt a leaf one.
+ // Save the return address only if the function isn't a leaf one.
// sw $ra, stack_loc($sp)
if (MFI->adjustsStack()) {
+ ATUsed = expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB,
+ MBBI);
BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
- .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
+ .addReg(Mips::RA).addImm(NewImm).addReg(NewReg);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
}
// if framepointer enabled, save it and set it
// to point to the stack pointer
if (hasFP(MF)) {
// sw $fp,stack_loc($sp)
+ ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB,
+ MBBI);
BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
- .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP);
+ .addReg(Mips::FP).addImm(NewImm).addReg(NewReg);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
// move $fp, $sp
BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
@@ -280,6 +341,10 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
int FPOffset = MipsFI->getFPStackOffset();
int RAOffset = MipsFI->getRAStackOffset();
+ unsigned NewReg = 0;
+ int NewImm = 0;
+ bool ATUsed = false;
+
// if framepointer enabled, restore it and restore the
// stack pointer
if (hasFP(MF)) {
@@ -288,21 +353,39 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(Mips::FP).addReg(Mips::ZERO);
// lw $fp,stack_loc($sp)
+ ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB,
+ MBBI);
BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
- .addImm(FPOffset).addReg(Mips::SP);
+ .addImm(NewImm).addReg(NewReg);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
}
- // Restore the return address only if the function isnt a leaf one.
+ // Restore the return address only if the function isn't a leaf one.
// lw $ra, stack_loc($sp)
if (MFI->adjustsStack()) {
+ ATUsed = expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB,
+ MBBI);
BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
- .addImm(RAOffset).addReg(Mips::SP);
+ .addImm(NewImm).addReg(NewReg);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
}
// adjust stack : insert addi sp, sp, (imm)
if (NumBytes) {
+ ATUsed = expandRegLargeImmPair(Mips::SP, NumBytes, NewReg, NewImm, MBB,
+ MBBI);
BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
- .addReg(Mips::SP).addImm(NumBytes);
+ .addReg(NewReg).addImm(NewImm);
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
}
}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index a8426c1..34647df 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ALPHA_FRAMEINFO_H
-#define ALPHA_FRAMEINFO_H
+#ifndef MIPS_FRAMEINFO_H
+#define MIPS_FRAMEINFO_H
#include "Mips.h"
#include "MipsSubtarget.h"
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 755e04d..0382964 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -52,19 +52,19 @@ class MipsDAGToDAGISel : public SelectionDAGISel {
/// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
/// make the right decision when generating code for different targets.
const MipsSubtarget &Subtarget;
-
+
public:
explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
SelectionDAGISel(tm),
TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
-
+
// Pass Name
virtual const char *getPassName() const {
return "MIPS DAG->DAG Pattern Instruction Selection";
- }
-
+ }
-private:
+
+private:
// Include the pieces autogenerated from the target description.
#include "MipsGenDAGISel.inc"
@@ -116,12 +116,14 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
-
+
// on PIC code Load GA
if (TM.getRelocationModel() == Reloc::PIC_) {
- if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
- (Addr.getOpcode() == ISD::TargetConstantPool) ||
- (Addr.getOpcode() == ISD::TargetJumpTable)){
+ if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+ (Addr.getOpcode() == ISD::TargetConstantPool) ||
+ (Addr.getOpcode() == ISD::TargetJumpTable) ||
+ (Addr.getOpcode() == ISD::TargetBlockAddress) ||
+ (Addr.getOpcode() == ISD::TargetExternalSymbol)) {
Base = CurDAG->getRegister(Mips::GP, MVT::i32);
Offset = Addr;
return true;
@@ -130,8 +132,8 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
Addr.getOpcode() == ISD::TargetGlobalAddress))
return false;
- }
-
+ }
+
// Operand is a result from an ADD.
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
@@ -158,10 +160,10 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
// Generate:
// lui $2, %hi($CPI1_0)
// lwc1 $f0, %lo($CPI1_0)($2)
- if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
+ if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
- SDValue LoVal = Addr.getOperand(1);
+ SDValue LoVal = Addr.getOperand(1);
if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) {
Base = Addr.getOperand(0);
Offset = LoVal.getOperand(0);
@@ -176,7 +178,7 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
}
SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
- MVT::SimpleValueType NVT =
+ MVT::SimpleValueType NVT =
N->getValueType(0).getSimpleVT().SimpleTy;
if (!Subtarget.isMips1() || NVT != MVT::f64)
@@ -199,14 +201,14 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
DebugLoc dl = N->getDebugLoc();
- // The second load should start after for 4 bytes.
+ // The second load should start after for 4 bytes.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0))
- Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(),
- MVT::i32,
- CP->getAlignment(),
- CP->getOffset()+4,
+ Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(),
+ MVT::i32,
+ CP->getAlignment(),
+ CP->getOffset()+4,
CP->getTargetFlags());
else
return NULL;
@@ -220,16 +222,16 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
// Generate:
// lwc $f0, X($3)
// lwc $f1, X+4($3)
- SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
+ SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
MVT::Other, Offset0, Base, Chain);
SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, NVT), 0);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
+ SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
MVT::f64, Undef, SDValue(LD0, 0));
SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
MVT::Other, Offset1, Base, SDValue(LD0, 1));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
+ SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
MVT::f64, I0, SDValue(LD1, 0));
ReplaceUses(SDValue(N, 0), I1);
@@ -241,7 +243,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
- if (!Subtarget.isMips1() ||
+ if (!Subtarget.isMips1() ||
N->getOperand(1).getValueType() != MVT::f64)
return NULL;
@@ -265,12 +267,12 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
// Get the even and odd part from the f64 register
- SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
+ SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
dl, MVT::f32, N1);
SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
dl, MVT::f32, N1);
- // The second store should start after for 4 bytes.
+ // The second store should start after for 4 bytes.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
else
@@ -315,26 +317,26 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
}
///
- // Instruction Selection not handled by the auto-generated
+ // Instruction Selection not handled by the auto-generated
// tablegen selection should be handled here.
- ///
+ ///
switch(Opcode) {
default: break;
- case ISD::SUBE:
+ case ISD::SUBE:
case ISD::ADDE: {
SDValue InFlag = Node->getOperand(2), CmpLHS;
unsigned Opc = InFlag.getOpcode(); (void)Opc;
- assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
- (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+ assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+ (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
"(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
unsigned MOp;
if (Opcode == ISD::ADDE) {
CmpLHS = InFlag.getValue(0);
MOp = Mips::ADDu;
- } else {
+ } else {
CmpLHS = InFlag.getOperand(0);
MOp = Mips::SUBu;
}
@@ -346,7 +348,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
EVT VT = LHS.getValueType();
SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
- SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
+ SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
SDValue(Carry,0), RHS);
return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
@@ -356,36 +358,34 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
/// Mul/Div with two results
case ISD::SDIVREM:
case ISD::UDIVREM:
+ break;
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue Op1 = Node->getOperand(0);
SDValue Op2 = Node->getOperand(1);
unsigned Op;
- if (Opcode == ISD::UMUL_LOHI || Opcode == ISD::SMUL_LOHI)
- Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
- else
- Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV);
+ Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
- SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
+ SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
- SDValue InFlag = SDValue(MulDiv, 0);
- SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
+ SDValue InFlag = SDValue(Mul, 0);
+ SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
MVT::Glue, InFlag);
InFlag = SDValue(Lo,1);
SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
- if (!SDValue(Node, 0).use_empty())
+ if (!SDValue(Node, 0).use_empty())
ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
- if (!SDValue(Node, 1).use_empty())
+ if (!SDValue(Node, 1).use_empty())
ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
return NULL;
}
/// Special Muls
- case ISD::MUL:
+ case ISD::MUL:
if (Subtarget.isMips32())
break;
case ISD::MULHS:
@@ -394,7 +394,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
SDValue MulOp2 = Node->getOperand(1);
unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
- SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
+ SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
MVT::Glue, MulOp1, MulOp2);
SDValue InFlag = SDValue(MulNode, 0);
@@ -408,24 +408,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
/// Div/Rem operations
case ISD::SREM:
case ISD::UREM:
- case ISD::SDIV:
- case ISD::UDIV: {
- SDValue Op1 = Node->getOperand(0);
- SDValue Op2 = Node->getOperand(1);
-
- unsigned Op, MOp;
- if (Opcode == ISD::SDIV || Opcode == ISD::UDIV) {
- Op = (Opcode == ISD::SDIV ? Mips::DIV : Mips::DIVu);
- MOp = Mips::MFLO;
- } else {
- Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu);
- MOp = Mips::MFHI;
- }
- SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
-
- SDValue InFlag = SDValue(Node, 0);
- return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag);
- }
+ case ISD::SDIV:
+ case ISD::UDIV:
+ break;
// Get target GOT address.
case ISD::GLOBAL_OFFSET_TABLE:
@@ -433,15 +418,15 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
case ISD::ConstantFP: {
ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
- if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
Mips::ZERO, MVT::i32);
SDValue Undef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
+ SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
MVT::f64, Undef, SDValue(MTC, 0));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
+ SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
MVT::f64, I0, SDValue(MTC, 0));
ReplaceUses(SDValue(Node, 0), I1);
return I1.getNode();
@@ -460,61 +445,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
return ResNode;
// Other cases are autogenerated.
break;
-
- /// Handle direct and indirect calls when using PIC. On PIC, when
- /// GOT is smaller than about 64k (small code) the GA target is
- /// loaded with only one instruction. Otherwise GA's target must
- /// be loaded with 3 instructions.
- case MipsISD::JmpLink: {
- if (TM.getRelocationModel() == Reloc::PIC_) {
- unsigned LastOpNum = Node->getNumOperands()-1;
-
- SDValue Chain = Node->getOperand(0);
- SDValue Callee = Node->getOperand(1);
- SDValue InFlag;
-
- // Skip the incomming flag if present
- if (Node->getOperand(LastOpNum).getValueType() == MVT::Glue)
- LastOpNum--;
-
- if ( (isa<GlobalAddressSDNode>(Callee)) ||
- (isa<ExternalSymbolSDNode>(Callee)) )
- {
- /// Direct call for global addresses and external symbols
- SDValue GPReg = CurDAG->getRegister(Mips::GP, MVT::i32);
-
- // Use load to get GOT target
- SDValue Ops[] = { Callee, GPReg, Chain };
- SDValue Load = SDValue(CurDAG->getMachineNode(Mips::LW, dl, MVT::i32,
- MVT::Other, Ops, 3), 0);
- Chain = Load.getValue(1);
-
- // Call target must be on T9
- Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Load, InFlag);
- } else
- /// Indirect call
- Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Callee, InFlag);
-
- // Map the JmpLink operands to JALR
- SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(CurDAG->getRegister(Mips::T9, MVT::i32));
-
- for (unsigned i = 2, e = LastOpNum+1; i != e; ++i)
- Ops.push_back(Node->getOperand(i));
- Ops.push_back(Chain);
- Ops.push_back(Chain.getValue(1));
-
- // Emit Jump and Link Register
- SDNode *ResNode = CurDAG->getMachineNode(Mips::JALR, dl, NodeTys,
- &Ops[0], Ops.size());
-
- // Replace Chain and InFlag
- ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
- ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 1));
- return ResNode;
- }
- }
}
// Select the default instruction
@@ -529,7 +459,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
return ResNode;
}
-/// createMipsISelDag - This pass converts a legalized DAG into a
+/// createMipsISelDag - This pass converts a legalized DAG into a
/// MIPS-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
return new MipsDAGToDAGISel(TM);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 1d7a1c0..1f1220f 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -41,15 +41,19 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::Lo : return "MipsISD::Lo";
case MipsISD::GPRel : return "MipsISD::GPRel";
case MipsISD::Ret : return "MipsISD::Ret";
- case MipsISD::SelectCC : return "MipsISD::SelectCC";
- case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC";
case MipsISD::FPBrcond : return "MipsISD::FPBrcond";
case MipsISD::FPCmp : return "MipsISD::FPCmp";
+ case MipsISD::CMovFP_T : return "MipsISD::CMovFP_T";
+ case MipsISD::CMovFP_F : return "MipsISD::CMovFP_F";
case MipsISD::FPRound : return "MipsISD::FPRound";
case MipsISD::MAdd : return "MipsISD::MAdd";
case MipsISD::MAddu : return "MipsISD::MAddu";
case MipsISD::MSub : return "MipsISD::MSub";
case MipsISD::MSubu : return "MipsISD::MSubu";
+ case MipsISD::DivRem : return "MipsISD::DivRem";
+ case MipsISD::DivRemU : return "MipsISD::DivRemU";
+ case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64";
+ case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
default : return NULL;
}
}
@@ -89,25 +93,22 @@ MipsTargetLowering(MipsTargetMachine &TM)
// Mips Custom Operations
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::f32, Custom);
- setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
-
- // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors'
- // with operands comming from setcc fp comparions. This is necessary since
- // the result from these setcc are in a flag registers (FCR31).
- setOperationAction(ISD::AND, MVT::i32, Custom);
- setOperationAction(ISD::OR, MVT::i32, Custom);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
// Operations not directly supported by Mips.
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -129,7 +130,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOWI, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FLOG, MVT::f32, Expand);
@@ -139,6 +142,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
// Use the default for now
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -160,6 +167,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::ADDE);
setTargetDAGCombine(ISD::SUBE);
+ setTargetDAGCombine(ISD::SDIVREM);
+ setTargetDAGCombine(ISD::UDIVREM);
+ setTargetDAGCombine(ISD::SETCC);
setStackPointerRegisterToSaveRestore(Mips::SP);
computeRegisterProperties();
@@ -181,7 +191,7 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
// multHi/Lo: product of multiplication
// Lo0: initial value of Lo register
// Hi0: initial value of Hi register
-// Return true if mattern matching was successful.
+// Return true if pattern matching was successful.
static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
// ADDENode's second operand must be a flag output of an ADDC node in order
// for the matching to be successful.
@@ -255,7 +265,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
// multHi/Lo: product of multiplication
// Lo0: initial value of Lo register
// Hi0: initial value of Hi register
-// Return true if mattern matching was successful.
+// Return true if pattern matching was successful.
static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
// SUBENode's second operand must be a flag output of an SUBC node in order
// for the matching to be successful.
@@ -346,6 +356,130 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
return SDValue();
}
+static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
+ MipsISD::DivRemU;
+ DebugLoc dl = N->getDebugLoc();
+
+ SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+ N->getOperand(0), N->getOperand(1));
+ SDValue InChain = DAG.getEntryNode();
+ SDValue InGlue = DivRem;
+
+ // insert MFLO
+ if (N->hasAnyUseOfValue(0)) {
+ SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32,
+ InGlue);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
+ InChain = CopyFromLo.getValue(1);
+ InGlue = CopyFromLo.getValue(2);
+ }
+
+ // insert MFHI
+ if (N->hasAnyUseOfValue(1)) {
+ SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+ Mips::HI, MVT::i32, InGlue);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
+ }
+
+ return SDValue();
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+ switch (CC) {
+ default: llvm_unreachable("Unknown fp condition code!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ: return Mips::FCOND_OEQ;
+ case ISD::SETUNE: return Mips::FCOND_UNE;
+ case ISD::SETLT:
+ case ISD::SETOLT: return Mips::FCOND_OLT;
+ case ISD::SETGT:
+ case ISD::SETOGT: return Mips::FCOND_OGT;
+ case ISD::SETLE:
+ case ISD::SETOLE: return Mips::FCOND_OLE;
+ case ISD::SETGE:
+ case ISD::SETOGE: return Mips::FCOND_OGE;
+ case ISD::SETULT: return Mips::FCOND_ULT;
+ case ISD::SETULE: return Mips::FCOND_ULE;
+ case ISD::SETUGT: return Mips::FCOND_UGT;
+ case ISD::SETUGE: return Mips::FCOND_UGE;
+ case ISD::SETUO: return Mips::FCOND_UN;
+ case ISD::SETO: return Mips::FCOND_OR;
+ case ISD::SETNE:
+ case ISD::SETONE: return Mips::FCOND_ONE;
+ case ISD::SETUEQ: return Mips::FCOND_UEQ;
+ }
+}
+
+
+// Returns true if condition code has to be inverted.
+static bool InvertFPCondCode(Mips::CondCode CC) {
+ if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+ return false;
+
+ if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+ return true;
+
+ assert(false && "Illegal Condition Code");
+ return false;
+}
+
+// Creates and returns an FPCmp node from a setcc node.
+// Returns Op if setcc is not a floating point comparison.
+static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) {
+ // must be a SETCC node
+ if (Op.getOpcode() != ISD::SETCC)
+ return Op;
+
+ SDValue LHS = Op.getOperand(0);
+
+ if (!LHS.getValueType().isFloatingPoint())
+ return Op;
+
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of
+ // node if necessary.
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS,
+ DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+// Creates and returns a CMovFPT/F node.
+static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
+ SDValue False, DebugLoc DL) {
+ bool invert = InvertFPCondCode((Mips::CondCode)
+ cast<ConstantSDNode>(Cond.getOperand(2))
+ ->getSExtValue());
+
+ return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL,
+ True.getValueType(), True, False, Cond);
+}
+
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget* Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+
+ if (Cond.getOpcode() != MipsISD::FPCmp)
+ return SDValue();
+
+ SDValue True = DAG.getConstant(1, MVT::i32);
+ SDValue False = DAG.getConstant(0, MVT::i32);
+
+ return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+}
+
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
@@ -357,6 +491,11 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return PerformADDECombine(N, DAG, DCI, Subtarget);
case ISD::SUBE:
return PerformSUBECombine(N, DAG, DCI, Subtarget);
+ case ISD::SDIVREM:
+ case ISD::UDIVREM:
+ return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+ case ISD::SETCC:
+ return PerformSETCCCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -367,17 +506,15 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
{
switch (Op.getOpcode())
{
- case ISD::AND: return LowerANDOR(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
- case ISD::OR: return LowerANDOR(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
}
return SDValue();
@@ -410,122 +547,110 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
return Mips::BRANCH_INVALID;
}
-static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
- switch(BC) {
- default:
- llvm_unreachable("Unknown branch code");
- case Mips::BRANCH_T : return Mips::BC1T;
- case Mips::BRANCH_F : return Mips::BC1F;
- case Mips::BRANCH_TL : return Mips::BC1TL;
- case Mips::BRANCH_FL : return Mips::BC1FL;
- }
-}
-
-static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
- switch (CC) {
- default: llvm_unreachable("Unknown fp condition code!");
- case ISD::SETEQ:
- case ISD::SETOEQ: return Mips::FCOND_EQ;
- case ISD::SETUNE: return Mips::FCOND_OGL;
- case ISD::SETLT:
- case ISD::SETOLT: return Mips::FCOND_OLT;
- case ISD::SETGT:
- case ISD::SETOGT: return Mips::FCOND_OGT;
- case ISD::SETLE:
- case ISD::SETOLE: return Mips::FCOND_OLE;
- case ISD::SETGE:
- case ISD::SETOGE: return Mips::FCOND_OGE;
- case ISD::SETULT: return Mips::FCOND_ULT;
- case ISD::SETULE: return Mips::FCOND_ULE;
- case ISD::SETUGT: return Mips::FCOND_UGT;
- case ISD::SETUGE: return Mips::FCOND_UGE;
- case ISD::SETUO: return Mips::FCOND_UN;
- case ISD::SETO: return Mips::FCOND_OR;
- case ISD::SETNE:
- case ISD::SETONE: return Mips::FCOND_NEQ;
- case ISD::SETUEQ: return Mips::FCOND_UEQ;
- }
-}
-
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ // There is no need to expand CMov instructions if target has
+ // conditional moves.
+ if (Subtarget->hasCondMov())
+ return BB;
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
bool isFPCmp = false;
DebugLoc dl = MI->getDebugLoc();
+ unsigned Opc;
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
- case Mips::Select_FCC:
- case Mips::Select_FCC_S32:
- case Mips::Select_FCC_D32:
- isFPCmp = true; // FALL THROUGH
- case Mips::Select_CC:
- case Mips::Select_CC_S32:
- case Mips::Select_CC_D32: {
- // To "insert" a SELECT_CC instruction, we actually have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // destination vreg to set, the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
-
- // thisMBB:
- // ...
- // TrueVal = ...
- // setcc r1, r2, r3
- // bNE r1, r0, copy1MBB
- // fallthrough --> copy0MBB
- MachineBasicBlock *thisMBB = BB;
- MachineFunction *F = BB->getParent();
- MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
- // Emit the right instruction according to the type of the operands compared
- if (isFPCmp) {
- // Find the condiction code present in the setcc operation.
- Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm();
- // Get the branch opcode from the branch code.
- unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC));
- BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
- } else
- BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg())
- .addReg(Mips::ZERO).addMBB(sinkMBB);
-
- // copy0MBB:
- // %FalseValue = ...
- // # fallthrough to sinkMBB
- BB = copy0MBB;
-
- // Update machine-CFG edges
- BB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
- // ...
- BB = sinkMBB;
+ case Mips::MOVT:
+ case Mips::MOVT_S:
+ case Mips::MOVT_D:
+ isFPCmp = true;
+ Opc = Mips::BC1F;
+ break;
+ case Mips::MOVF:
+ case Mips::MOVF_S:
+ case Mips::MOVF_D:
+ isFPCmp = true;
+ Opc = Mips::BC1T;
+ break;
+ case Mips::MOVZ_I:
+ case Mips::MOVZ_S:
+ case Mips::MOVZ_D:
+ Opc = Mips::BNE;
+ break;
+ case Mips::MOVN_I:
+ case Mips::MOVN_S:
+ case Mips::MOVN_D:
+ Opc = Mips::BEQ;
+ break;
+ }
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ // Emit the right instruction according to the type of the operands compared
+ if (isFPCmp)
+ BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+ else
+ BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg())
+ .addReg(Mips::ZERO).addMBB(sinkMBB);
+
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+ // ...
+ BB = sinkMBB;
+
+ if (isFPCmp)
BuildMI(*BB, BB->begin(), dl,
TII->get(Mips::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
- .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB);
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
+ else
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
- }
- }
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
}
//===----------------------------------------------------------------------===//
@@ -590,27 +715,6 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
}
SDValue MipsTargetLowering::
-LowerANDOR(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
-
- if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp)
- return Op;
-
- SDValue True = DAG.getConstant(1, MVT::i32);
- SDValue False = DAG.getConstant(0, MVT::i32);
-
- SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
- LHS, True, False, LHS.getOperand(2));
- SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
- RHS, True, False, RHS.getOperand(2));
-
- return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL);
-}
-
-SDValue MipsTargetLowering::
LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
{
// The first operand is the chain, the second is the condition, the third is
@@ -619,58 +723,32 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
SDValue Dest = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp)
+ SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1));
+
+ // Return if flag is not set by a floating point comparison.
+ if (CondRes.getOpcode() != MipsISD::FPCmp)
return Op;
- SDValue CondRes = Op.getOperand(1);
SDValue CCNode = CondRes.getOperand(2);
Mips::CondCode CC =
(Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
- Dest, CondRes);
-}
-
-SDValue MipsTargetLowering::
-LowerSETCC(SDValue Op, SelectionDAG &DAG) const
-{
- // The operands to this are the left and right operands to compare (ops #0,
- // and #1) and the condition code to compare them with (op #2) as a
- // CondCodeSDNode.
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
-
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-
- return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS,
- DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+ Dest, CondRes);
}
SDValue MipsTargetLowering::
LowerSELECT(SDValue Op, SelectionDAG &DAG) const
{
- SDValue Cond = Op.getOperand(0);
- SDValue True = Op.getOperand(1);
- SDValue False = Op.getOperand(2);
- DebugLoc dl = Op.getDebugLoc();
+ SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0));
- // if the incomming condition comes from a integer compare, the select
- // operation must be SelectCC or a conditional move if the subtarget
- // supports it.
- if (Cond.getOpcode() != MipsISD::FPCmp) {
- if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint())
- return Op;
- return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(),
- Cond, True, False);
- }
+ // Return if flag is not set by a floating point comparison.
+ if (Cond.getOpcode() != MipsISD::FPCmp)
+ return Op;
- // if the incomming condition comes from fpcmp, the select
- // operation must use FPSelectCC.
- SDValue CCNode = Cond.getOperand(2);
- return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
- Cond, True, False, CCNode);
+ return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2),
+ Op.getDebugLoc());
}
SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -693,12 +771,13 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
}
// %hi/%lo relocation
- SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
- MipsII::MO_ABS_HILO);
- SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+ SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_ABS_HI);
+ SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_ABS_LO);
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
-
} else {
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GOT);
@@ -707,9 +786,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
false, false, 0);
// On functions and global targets not internal linked only
// a load from got/GP is necessary for PIC to work.
- if (!GV->hasLocalLinkage() || isa<Function>(GV))
+ if (!GV->hasInternalLinkage() &&
+ (!GV->hasLocalLinkage() || isa<Function>(GV)))
return ResNode;
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+ SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+ MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo);
return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
}
@@ -717,6 +799,34 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
return SDValue(0,0);
}
+SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ // %hi/%lo relocation
+ SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_HI);
+ SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_LO);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+ }
+
+ SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_GOT);
+ SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+ MipsII::MO_ABS_LO);
+ SDValue Load = DAG.getLoad(MVT::i32, dl,
+ DAG.getEntryNode(), BAGOTOffset,
+ MachinePointerInfo(), false, false, 0);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
+ return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+}
+
SDValue MipsTargetLowering::
LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
{
@@ -732,7 +842,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HILO;
+ unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
EVT PtrVT = Op.getValueType();
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
@@ -747,7 +857,9 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
MachinePointerInfo(),
false, false, 0);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
return ResNode;
@@ -764,7 +876,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
// gp_rel relocation
// FIXME: we should reference the constant pool using small data sections,
- // but the asm printer currently doens't support this feature without
+ // but the asm printer currently doesn't support this feature without
// hacking it. This feature should come soon so we can uncomment the
// stuff below.
//if (IsInSmallSection(C->getType())) {
@@ -773,18 +885,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
// ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
- SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_ABS_HILO);
- SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+ SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_HI);
+ SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_LO);
+ SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
} else {
SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- N->getOffset(), MipsII::MO_GOT);
+ N->getOffset(), MipsII::MO_GOT);
SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
CP, MachinePointerInfo::getConstantPool(),
false, false, 0);
- SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+ SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MipsII::MO_ABS_LO);
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
}
@@ -937,46 +1053,28 @@ static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::AExt;
}
- if (ValVT == MVT::i32 || ValVT == MVT::f32) {
- if (unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
- return false;
- }
- unsigned Off = State.AllocateStack(4, 4);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo));
- return false;
- }
-
- unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize);
- if (ValVT == MVT::f64) {
- if (IntRegs[UnallocIntReg] == (unsigned (Mips::A1))) {
- // A1 can't be used anymore, because 64 bit arguments
- // must be aligned when copied back to the caller stack
- State.AllocateReg(IntRegs, IntRegsSize);
- UnallocIntReg++;
- }
-
- if (IntRegs[UnallocIntReg] == (unsigned (Mips::A0)) ||
- IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) {
- unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize);
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
- // Shadow the next register so it can be used
- // later to get the other 32bit part.
- State.AllocateReg(IntRegs, IntRegsSize);
- return false;
- }
+ unsigned Reg;
- // Register is shadowed to preserve alignment, and the
- // argument goes to a stack location.
- if (UnallocIntReg != IntRegsSize)
- State.AllocateReg(IntRegs, IntRegsSize);
+ if (ValVT == MVT::i32 || ValVT == MVT::f32) {
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ LocVT = MVT::i32;
+ } else if (ValVT == MVT::f64) {
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ if (Reg == Mips::A1 || Reg == Mips::A3)
+ Reg = State.AllocateReg(IntRegs, IntRegsSize);
+ State.AllocateReg(IntRegs, IntRegsSize);
+ LocVT = MVT::i32;
+ } else
+ llvm_unreachable("Cannot handle this ValVT.");
- unsigned Off = State.AllocateStack(8, 8);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo));
- return false;
- }
+ if (!Reg) {
+ unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+ unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ } else
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return true; // CC didn't match
+ return false; // CC must always match
}
//===----------------------------------------------------------------------===//
@@ -1043,11 +1141,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
- Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
- DAG.getConstant(0, getPointerTy()));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
- DAG.getConstant(1, getPointerTy()));
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Arg, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
+ Arg, DAG.getConstant(1, MVT::i32));
+ if (!Subtarget->isLittle())
+ std::swap(Lo, Hi);
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
continue;
@@ -1100,7 +1199,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emited instructions must be
+ // The InFlag in necessary since all emitted instructions must be
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
@@ -1113,12 +1212,52 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- getPointerTy(), 0, OpFlag);
- else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ bool LoadSymAddr = false;
+ SDValue CalleeLo;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (IsPIC && G->getGlobal()->hasInternalLinkage()) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0,MipsII:: MO_GOT);
+ CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(),
+ 0, MipsII::MO_ABS_LO);
+ } else {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ getPointerTy(), 0, OpFlag);
+ }
+
+ LoadSymAddr = true;
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
getPointerTy(), OpFlag);
+ LoadSymAddr = true;
+ }
+
+ // Create nodes that load address of callee and copy it to T9
+ if (IsPIC) {
+ if (LoadSymAddr) {
+ // Load callee address
+ SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee,
+ MachinePointerInfo::getGOT(),
+ false, false, 0);
+
+ // Use GOT+LO if callee has internal linkage.
+ if (CalleeLo.getNode()) {
+ SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo);
+ Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
+ } else
+ Callee = LoadValue;
+
+ // Use chain output from LoadValue
+ Chain = LoadValue.getValue(1);
+ }
+
+ // copy to T9
+ Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0));
+ InFlag = Chain.getValue(1);
+ Callee = DAG.getRegister(Mips::T9, MVT::i32);
+ }
// MipsJmpLink = #chain, #target_address, #opt_in_flags...
// = Chain, Callee, Reg#1, Reg#2, ...
@@ -1143,7 +1282,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create a stack location to hold GP when PIC is used. This stack
// location is used on function prologue to save GP and also after all
- // emited CALL's to restore GP.
+ // emitted CALL's to restore GP.
if (IsPIC) {
// Function can have an arbitrary number of calls, so
// hold the LastArgStackLoc with the biggest offset.
@@ -1218,18 +1357,18 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
/// and generate load operations for arguments places on the stack.
SDValue
MipsTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg>
- &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
- unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
MipsFI->setVarArgsFrameIndex(0);
// Used with vargs to acumulate store chains.
@@ -1249,9 +1388,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
else
CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
- SDValue StackPtr;
-
unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
+ unsigned LastStackArgEndOffset = 0;
+ EVT LastRegArgValVT;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -1260,6 +1399,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
ArgRegEnd = VA.getLocReg();
+ LastRegArgValVT = VA.getValVT();
TargetRegisterClass *RC = 0;
if (RegVT == MVT::i32)
@@ -1300,8 +1440,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
VA.getLocReg()+1, RC);
SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
- SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue2, ArgValue);
- ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair);
+ if (!Subtarget->isLittle())
+ std::swap(ArgValue, ArgValue2);
+ ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+ ArgValue, ArgValue2);
}
}
@@ -1321,10 +1463,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// used instead of a direct negative address (which is recorded to
// be used on emitPrologue) to avoid mis-calc of the first stack
// offset on PEI::calculateFrameObjectOffsets.
- // Arguments are always 32-bit.
- unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+ LastStackArgEndOffset = FirstStackArgLoc + VA.getLocMemOffset() + ArgSize;
int FI = MFI->CreateFixedObject(ArgSize, 0, true);
- MipsFI->recordLoadArgsFI(FI, -(ArgSize+
+ MipsFI->recordLoadArgsFI(FI, -(4 +
(FirstStackArgLoc + VA.getLocMemOffset())));
// Create load nodes to retrieve arguments from the stack
@@ -1351,29 +1493,52 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// To meet ABI, when VARARGS are passed on registers, the registers
// must have their values written to the caller stack frame. If the last
// argument was placed in the stack, there's no need to save any register.
- if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) {
- if (StackPtr.getNode() == 0)
- StackPtr = DAG.getRegister(StackReg, getPointerTy());
-
- // The last register argument that must be saved is Mips::A3
- TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
- unsigned StackLoc = ArgLocs.size()-1;
-
- for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd, ++StackLoc) {
- unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
-
- int FI = MFI->CreateFixedObject(4, 0, true);
- MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
- SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
- OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
- MachinePointerInfo(),
- false, false, 0));
-
- // Record the frame index of the first variable argument
- // which is a value necessary to VASTART.
- if (!MipsFI->getVarArgsFrameIndex())
+ if (isVarArg && Subtarget->isABI_O32()) {
+ if (ArgRegEnd) {
+ // Last named formal argument is passed in register.
+
+ // The last register argument that must be saved is Mips::A3
+ TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
+ if (LastRegArgValVT == MVT::f64)
+ ArgRegEnd++;
+
+ if (ArgRegEnd < Mips::A3) {
+ // Both the last named formal argument and the first variable
+ // argument are passed in registers.
+ for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd) {
+ unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
+
+ int FI = MFI->CreateFixedObject(4, 0, true);
+ MipsFI->recordStoreVarArgsFI(FI, -(4+(ArgRegEnd-Mips::A0)*4));
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+ OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ if (!MipsFI->getVarArgsFrameIndex()) {
+ MFI->setObjectAlignment(FI, 4);
+ MipsFI->setVarArgsFrameIndex(FI);
+ }
+ }
+ } else {
+ // Last named formal argument is in register Mips::A3, and the first
+ // variable argument is on stack. Record the frame index of the first
+ // variable argument.
+ int FI = MFI->CreateFixedObject(4, 0, true);
+ MFI->setObjectAlignment(FI, 4);
+ MipsFI->recordStoreVarArgsFI(FI, -20);
MipsFI->setVarArgsFrameIndex(FI);
+ }
+ } else {
+ // Last named formal argument and all the variable arguments are passed
+ // on stack. Record the frame index of the first variable argument.
+ int FI = MFI->CreateFixedObject(4, 0, true);
+ MFI->setObjectAlignment(FI, 4);
+ MipsFI->recordStoreVarArgsFI(FI, -(4+LastStackArgEndOffset));
+ MipsFI->setVarArgsFrameIndex(FI);
}
}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 9d6b9f3..e4d0c3d 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -31,45 +31,50 @@ namespace llvm {
// Get the Higher 16 bits from a 32-bit immediate
// No relation with Mips Hi register
- Hi,
+ Hi,
// Get the Lower 16 bits from a 32-bit immediate
// No relation with Mips Lo register
- Lo,
+ Lo,
// Handle gp_rel (small data/bss sections) relocation.
GPRel,
- // Select CC Pseudo Instruction
- SelectCC,
-
- // Floating Point Select CC Pseudo Instruction
- FPSelectCC,
-
// Floating Point Branch Conditional
FPBrcond,
// Floating Point Compare
FPCmp,
+ // Floating Point Conditional Moves
+ CMovFP_T,
+ CMovFP_F,
+
// Floating Point Rounding
FPRound,
- // Return
+ // Return
Ret,
// MAdd/Sub nodes
MAdd,
MAddu,
MSub,
- MSubu
+ MSubu,
+
+ // DivRem(u)
+ DivRem,
+ DivRemU,
+
+ BuildPairF64,
+ ExtractElementF64
};
}
//===--------------------------------------------------------------------===//
// TargetLowering Implementation
//===--------------------------------------------------------------------===//
-
+
class MipsTargetLowering : public TargetLowering {
public:
explicit MipsTargetLowering(MipsTargetMachine &TM);
@@ -77,7 +82,7 @@ namespace llvm {
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- /// getTargetNodeName - This method returns the name of a target specific
+ /// getTargetNodeName - This method returns the name of a target specific
// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
@@ -87,7 +92,7 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
private:
// Subtarget Info
const MipsSubtarget *Subtarget;
@@ -101,16 +106,15 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals) const;
// Lower Operand specifics
- SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
@@ -149,7 +153,7 @@ namespace llvm {
ConstraintWeight getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const;
- std::pair<unsigned, const TargetRegisterClass*>
+ std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 977e0df..a86c5c7 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -24,19 +24,28 @@
//===----------------------------------------------------------------------===//
// Floating Point Compare and Branch
-def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>,
- SDTCisVT<1, OtherVT>]>;
-def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
- SDTCisSameAs<1, 2>, SDTCisFP<1>,
- SDTCisInt<3>]>;
-def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
- SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
-
+def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>,
+ SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>,
+ SDTCisInt<2>]>;
+def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>,
+ SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, f64>,
+ SDTCisVT<0, i32>]>;
+
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>;
+def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>;
+def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>;
def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
-def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
- [SDNPHasChain]>;
-def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>;
-def MipsFPSelectCC : SDNode<"MipsISD::FPSelectCC", SDT_MipsFPSelectCC>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>;
+def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64",
+ SDT_MipsExtractElementF64>;
// Operand for printing out a condition code.
let PrintMethod = "printFCCOperand" in
@@ -54,7 +63,7 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">;
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//
-// A set of multiclasses is used to address the register usage.
+// A set of multiclasses is used to address the register usage.
//
// S32 - single precision in 16 32bit even fp registers
// single precision in 32 32bit fp registers in SingleOnly mode
@@ -65,7 +74,7 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">;
// Only S32 and D32 are supported right now.
//===----------------------------------------------------------------------===//
-multiclass FFR1_1<bits<6> funct, string asmstr>
+multiclass FFR1_1<bits<6> funct, string asmstr>
{
def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
!strconcat(asmstr, ".s $fd, $fs"), []>;
@@ -74,31 +83,31 @@ multiclass FFR1_1<bits<6> funct, string asmstr>
!strconcat(asmstr, ".d $fd, $fs"), []>, Requires<[In32BitMode]>;
}
-multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
+multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
{
def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
- !strconcat(asmstr, ".s $fd, $fs"),
+ !strconcat(asmstr, ".s $fd, $fs"),
[(set FGR32:$fd, (FOp FGR32:$fs))]>;
def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
- !strconcat(asmstr, ".d $fd, $fs"),
+ !strconcat(asmstr, ".d $fd, $fs"),
[(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
}
-class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
- RegisterClass RcDst, string asmstr>:
- FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
- !strconcat(asmstr, " $fd, $fs"), []>;
+class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
+ RegisterClass RcDst, string asmstr>:
+ FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
+ !strconcat(asmstr, " $fd, $fs"), []>;
multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp> {
- def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
- (ins FGR32:$fs, FGR32:$ft),
+ def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
+ (ins FGR32:$fs, FGR32:$ft),
!strconcat(asmstr, ".s $fd, $fs, $ft"),
[(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
- def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
- (ins AFGR64:$fs, AFGR64:$ft),
+ def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
+ (ins AFGR64:$fs, AFGR64:$ft),
!strconcat(asmstr, ".d $fd, $fs, $ft"),
[(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
Requires<[In32BitMode]>;
@@ -115,8 +124,8 @@ let ft = 0 in {
defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
defm CVTW : FFR1_1<0b100100, "cvt.w">;
- defm FABS : FFR1_2<0b000101, "abs", fabs>;
- defm FNEG : FFR1_2<0b000111, "neg", fneg>;
+ defm FABS : FFR1_2<0b000101, "abs", fabs>;
+ defm FNEG : FFR1_2<0b000111, "neg", fneg>;
defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>;
/// Convert to Single Precison
@@ -140,23 +149,23 @@ let ft = 0 in {
def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
/// Convert to long signed integer
- def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
- def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
-
- /// Convert to Double Precison
- def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
- def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
- def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
-
+ def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
+ def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
+
+ /// Convert to Double Precison
+ def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
+ def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
+ def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
+
/// Convert to Single Precison
def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
- def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
+ def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
}
}
// The odd-numbered registers are only referenced when doing loads,
// stores, and moves between floating-point and integer registers.
-// When defining instructions, we reference all 32-bit registers,
+// When defining instructions, we reference all 32-bit registers,
// regardless of register aliasing.
let fd = 0 in {
/// Move Control Registers From/To CPU Registers
@@ -165,7 +174,7 @@ let fd = 0 in {
def CTC1 : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs),
"ctc1 $fs, $rt", []>;
-
+
def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
"mfc1 $rt, $fs", []>;
@@ -180,18 +189,18 @@ def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
/// Floating Point Memory Instructions
let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
- def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
+ def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
"ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
- def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
+ def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
"sdc1 $ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
}
-// LWC1 and SWC1 can always be emited with odd registers.
+// LWC1 and SWC1 can always be emitted with odd registers.
def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1 $ft, $addr",
- [(set FGR32:$ft, (load addr:$addr))]>;
+ [(set FGR32:$ft, (load addr:$addr))]>;
def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), "swc1 $ft, $addr",
- [(store FGR32:$ft, addr:$addr)]>;
+ [(store FGR32:$ft, addr:$addr)]>;
/// Floating-point Aritmetic
defm FADD : FFR1_4<0x10, "add", fadd>;
@@ -202,7 +211,7 @@ defm FSUB : FFR1_4<0x01, "sub", fsub>;
//===----------------------------------------------------------------------===//
// Floating Point Branch Codes
//===----------------------------------------------------------------------===//
-// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
// They must be kept in synch.
def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
@@ -210,11 +219,11 @@ def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
/// Floating Point Branch of False/True (Likely)
-let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in {
- class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
+let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in
+ class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
(ins brtarget:$dst), !strconcat(asmstr, " $dst"),
- [(MipsFPBrcond op, bb:$dst, FCR31)]>;
-}
+ [(MipsFPBrcond op, bb:$dst)]>;
+
def BC1F : FBRANCH<MIPS_BRANCH_F, "bc1f">;
def BC1T : FBRANCH<MIPS_BRANCH_T, "bc1t">;
def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
@@ -223,11 +232,11 @@ def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
//===----------------------------------------------------------------------===//
-// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
// They must be kept in synch.
def MIPS_FCOND_F : PatLeaf<(i32 0)>;
def MIPS_FCOND_UN : PatLeaf<(i32 1)>;
-def MIPS_FCOND_EQ : PatLeaf<(i32 2)>;
+def MIPS_FCOND_OEQ : PatLeaf<(i32 2)>;
def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>;
def MIPS_FCOND_OLT : PatLeaf<(i32 4)>;
def MIPS_FCOND_ULT : PatLeaf<(i32 5)>;
@@ -245,44 +254,90 @@ def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
/// Floating Point Compare
let hasDelaySlot = 1, Defs=[FCR31] in {
def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
- "c.$cc.s $fs, $ft",
- [(set FCR31, (MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc))]>;
-
+ "c.$cc.s $fs, $ft",
+ [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
+
def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
- "c.$cc.d $fs, $ft",
- [(set FCR31, (MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc))]>,
- Requires<[In32BitMode]>;
+ "c.$cc.d $fs, $ft",
+ [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
+ Requires<[In32BitMode]>;
}
-//===----------------------------------------------------------------------===//
-// Floating Point Pseudo-Instructions
-//===----------------------------------------------------------------------===//
-// For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a
-// condiciton code to enable easy handling by the Custom Inserter.
-let usesCustomInserter = 1, Uses=[FCR31] in {
- class PseudoFPSelCC<RegisterClass RC, string asmstr> :
- MipsPseudo<(outs RC:$dst),
- (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr,
- [(set RC:$dst, (MipsFPSelectCC CPURegs:$CmpRes, RC:$T, RC:$F,
- imm:$cc))]>;
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:float
+let usesCustomInserter = 1, Constraints = "$F = $dst" in
+class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
+ string instr_asm> :
+ FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $cond"), []>;
+
+def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
+def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
+
+let Predicates = [In32BitMode] in {
+ def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
+ def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
}
-// The values to be selected are fp but the condition test is with integers.
-def Select_CC_S32 : PseudoSelCC<FGR32, "# MipsSelect_CC_S32_f32">;
-def Select_CC_D32 : PseudoSelCC<AFGR64, "# MipsSelect_CC_D32_f32">,
- Requires<[In32BitMode]>;
+defm : MovzPats<FGR32, MOVZ_S>;
+defm : MovnPats<FGR32, MOVN_S>;
+
+let Predicates = [In32BitMode] in {
+ defm : MovzPats<AFGR64, MOVZ_D>;
+ defm : MovnPats<AFGR64, MOVN_D>;
+}
-// The values to be selected are int but the condition test is done with fp.
-def Select_FCC : PseudoFPSelCC<CPURegs, "# MipsSelect_FCC">;
+let usesCustomInserter = 1, Uses = [FCR31], Constraints = "$F = $dst" in {
+// flag:float, data:int
+class CondMovFPInt<SDNode cmov, bits<1> tf, string instr_asm> :
+ FCMOV<tf, (outs CPURegs:$dst), (ins CPURegs:$T, CPURegs:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+ [(set CPURegs:$dst, (cmov CPURegs:$T, CPURegs:$F))]>;
+
+// flag:float, data:float
+class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
+ string instr_asm> :
+ FFCMOV<fmt, tf, (outs RC:$dst), (ins RC:$T, RC:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
+ [(set RC:$dst, (cmov RC:$T, RC:$F))]>;
+}
-// The values to be selected and the condition test is done with fp.
-def Select_FCC_S32 : PseudoFPSelCC<FGR32, "# MipsSelect_FCC_S32_f32">;
-def Select_FCC_D32 : PseudoFPSelCC<AFGR64, "# MipsSelect_FCC_D32_f32">,
- Requires<[In32BitMode]>;
+def MOVT : CondMovFPInt<MipsCMovFP_T, 1, "movt">;
+def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
+def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
+def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
-def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
- "# MOVCCRToCCR", []>;
+let Predicates = [In32BitMode] in {
+ def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+ def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
+ "# MOVCCRToCCR", []>;
+
+// This pseudo instr gets expanded into 2 mtc1 instrs after register
+// allocation.
+def BuildPairF64 :
+ MipsPseudo<(outs AFGR64:$dst),
+ (ins CPURegs:$lo, CPURegs:$hi), "",
+ [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+
+// This pseudo instr gets expanded into 2 mfc1 instrs after register
+// allocation.
+// if n is 0, lower part of src is extracted.
+// if n is 1, higher part of src is extracted.
+def ExtractElementF64 :
+ MipsPseudo<(outs CPURegs:$dst),
+ (ins AFGR64:$src, i32imm:$n), "",
+ [(set CPURegs:$dst,
+ (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
@@ -306,7 +361,7 @@ def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>;
def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
-let Predicates = [In32BitMode] in {
+let Predicates = [In32BitMode] in {
def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 98ae2fa..9dfcdfb 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -22,8 +22,8 @@
//===----------------------------------------------------------------------===//
// Generic Mips Format
-class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: Instruction
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin>: Instruction
{
field bits<32> Inst;
@@ -32,8 +32,8 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
bits<6> opcode;
// Top 5 bits are the 'opcode' field
- let Inst{31-26} = opcode;
-
+ let Inst{31-26} = opcode;
+
dag OutOperandList = outs;
dag InOperandList = ins;
@@ -52,7 +52,7 @@ class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
list<dag> pattern, InstrItinClass itin>:
- MipsInst<outs, ins, asmstr, pattern, itin>
+ MipsInst<outs, ins, asmstr, pattern, itin>
{
bits<5> rd;
bits<5> rs;
@@ -64,7 +64,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
let funct = _funct;
let Inst{25-21} = rs;
- let Inst{20-16} = rt;
+ let Inst{20-16} = rt;
let Inst{15-11} = rd;
let Inst{10-6} = shamt;
let Inst{5-0} = funct;
@@ -75,7 +75,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
//===----------------------------------------------------------------------===//
class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
{
bits<5> rt;
bits<5> rs;
@@ -84,7 +84,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
let opcode = op;
let Inst{25-21} = rs;
- let Inst{20-16} = rt;
+ let Inst{20-16} = rt;
let Inst{15-0} = imm16;
}
@@ -93,12 +93,12 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
//===----------------------------------------------------------------------===//
class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+ InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
{
bits<26> addr;
let opcode = op;
-
+
let Inst{25-0} = addr;
}
@@ -119,9 +119,9 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
//===----------------------------------------------------------------------===//
-class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
- string asmstr, list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
{
bits<5> fd;
bits<5> fs;
@@ -134,7 +134,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
let fmt = _fmt;
let Inst{25-21} = fmt;
- let Inst{20-16} = ft;
+ let Inst{20-16} = ft;
let Inst{15-11} = fs;
let Inst{10-6} = fd;
let Inst{5-0} = funct;
@@ -144,8 +144,8 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
//===----------------------------------------------------------------------===//
-class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
- MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
{
bits<5> ft;
bits<5> base;
@@ -154,7 +154,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
let opcode = op;
let Inst{25-21} = base;
- let Inst{20-16} = ft;
+ let Inst{20-16} = ft;
let Inst{15-0} = imm16;
}
@@ -162,8 +162,8 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
//===----------------------------------------------------------------------===//
-class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
- MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
{
bits<5> fs;
bits<5> ft;
@@ -174,9 +174,54 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
let fmt = _fmt;
let Inst{25-21} = fmt;
- let Inst{20-16} = ft;
+ let Inst{20-16} = ft;
let Inst{15-11} = fs;
let Inst{10-6} = 0;
let Inst{5-4} = 0b11;
let Inst{3-0} = cc;
}
+
+
+class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
+ list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> rd;
+ bits<5> rs;
+ bits<3> N;
+ bits<1> tf;
+
+ let opcode = 0;
+ let tf = _tf;
+
+ let Inst{25-21} = rs;
+ let Inst{20-18} = N;
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 1;
+}
+
+class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
+ list<dag> pattern> :
+ MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+ bits<5> fd;
+ bits<5> fs;
+ bits<3> N;
+ bits<5> fmt;
+ bits<1> tf;
+
+ let opcode = 17;
+ let fmt = _fmt;
+ let tf = _tf;
+
+ let Inst{25-21} = fmt;
+ let Inst{20-18} = N;
+ let Inst{17} = 0;
+ let Inst{16} = tf;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = 17;
+} \ No newline at end of file
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index aaf307b..be044fa 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -36,7 +36,7 @@ static bool isZeroImm(const MachineOperand &op) {
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than loading from the stack slot.
unsigned MipsInstrInfo::
-isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
(MI->getOpcode() == Mips::LDC1)) {
@@ -57,7 +57,7 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than storing to the stack slot.
unsigned MipsInstrInfo::
-isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
(MI->getOpcode() == Mips::SDC1)) {
@@ -74,7 +74,7 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
/// insertNoop - If data hazard condition is found insert the target nop
/// instruction.
void MipsInstrInfo::
-insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
{
DebugLoc DL;
BuildMI(MBB, MI, DL, get(Mips::NOP));
@@ -136,7 +136,7 @@ copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
-
+
if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -153,13 +153,13 @@ copyPhysReg(MachineBasicBlock &MBB,
void MipsInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
+ unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == Mips::CPURegsRegisterClass)
+ if (RC == Mips::CPURegsRegisterClass)
BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
.addImm(0).addFrameIndex(FI);
else if (RC == Mips::FGR32RegisterClass)
@@ -171,7 +171,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(SrcReg, getKillRegState(isKill))
.addImm(0).addFrameIndex(FI);
} else {
- const TargetRegisterInfo *TRI =
+ const TargetRegisterInfo *TRI =
MBB.getParent()->getTarget().getRegisterInfo();
const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
BuildMI(MBB, I, DL, get(Mips::SWC1))
@@ -189,12 +189,12 @@ void MipsInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const
+ const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == Mips::CPURegsRegisterClass)
+ if (RC == Mips::CPURegsRegisterClass)
BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI);
else if (RC == Mips::FGR32RegisterClass)
BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI);
@@ -202,7 +202,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI);
} else {
- const TargetRegisterInfo *TRI =
+ const TargetRegisterInfo *TRI =
MBB.getParent()->getTarget().getRegisterInfo();
const unsigned *SubSet = TRI->getSubRegisters(DestReg);
BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
@@ -218,281 +218,202 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// Branch Analysis
//===----------------------------------------------------------------------===//
-/// GetCondFromBranchOpc - Return the Mips CC that matches
-/// the correspondent Branch instruction opcode.
-static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc)
-{
- switch (BrOpc) {
- default: return Mips::COND_INVALID;
- case Mips::BEQ : return Mips::COND_E;
- case Mips::BNE : return Mips::COND_NE;
- case Mips::BGTZ : return Mips::COND_GZ;
- case Mips::BGEZ : return Mips::COND_GEZ;
- case Mips::BLTZ : return Mips::COND_LZ;
- case Mips::BLEZ : return Mips::COND_LEZ;
-
- // We dont do fp branch analysis yet!
- case Mips::BC1T :
- case Mips::BC1F : return Mips::COND_INVALID;
- }
+static unsigned GetAnalyzableBrOpc(unsigned Opc) {
+ return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ ||
+ Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ ||
+ Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0;
}
-/// GetCondBranchFromCond - Return the Branch instruction
-/// opcode that matches the cc.
-unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC)
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
{
- switch (CC) {
- default: llvm_unreachable("Illegal condition code!");
- case Mips::COND_E : return Mips::BEQ;
- case Mips::COND_NE : return Mips::BNE;
- case Mips::COND_GZ : return Mips::BGTZ;
- case Mips::COND_GEZ : return Mips::BGEZ;
- case Mips::COND_LZ : return Mips::BLTZ;
- case Mips::COND_LEZ : return Mips::BLEZ;
-
- case Mips::FCOND_F:
- case Mips::FCOND_UN:
- case Mips::FCOND_EQ:
- case Mips::FCOND_UEQ:
- case Mips::FCOND_OLT:
- case Mips::FCOND_ULT:
- case Mips::FCOND_OLE:
- case Mips::FCOND_ULE:
- case Mips::FCOND_SF:
- case Mips::FCOND_NGLE:
- case Mips::FCOND_SEQ:
- case Mips::FCOND_NGL:
- case Mips::FCOND_LT:
- case Mips::FCOND_NGE:
- case Mips::FCOND_LE:
- case Mips::FCOND_NGT: return Mips::BC1T;
-
- case Mips::FCOND_T:
- case Mips::FCOND_OR:
- case Mips::FCOND_NEQ:
- case Mips::FCOND_OGL:
- case Mips::FCOND_UGE:
- case Mips::FCOND_OGE:
- case Mips::FCOND_UGT:
- case Mips::FCOND_OGT:
- case Mips::FCOND_ST:
- case Mips::FCOND_GLE:
- case Mips::FCOND_SNE:
- case Mips::FCOND_GL:
- case Mips::FCOND_NLT:
- case Mips::FCOND_GE:
- case Mips::FCOND_NLE:
- case Mips::FCOND_GT: return Mips::BC1F;
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BEQ : return Mips::BNE;
+ case Mips::BNE : return Mips::BEQ;
+ case Mips::BGTZ : return Mips::BLEZ;
+ case Mips::BGEZ : return Mips::BLTZ;
+ case Mips::BLTZ : return Mips::BGEZ;
+ case Mips::BLEZ : return Mips::BGTZ;
+ case Mips::BC1T : return Mips::BC1F;
+ case Mips::BC1F : return Mips::BC1T;
}
}
-/// GetOppositeBranchCondition - Return the inverse of the specified
-/// condition, e.g. turning COND_E to COND_NE.
-Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC)
-{
- switch (CC) {
- default: llvm_unreachable("Illegal condition code!");
- case Mips::COND_E : return Mips::COND_NE;
- case Mips::COND_NE : return Mips::COND_E;
- case Mips::COND_GZ : return Mips::COND_LEZ;
- case Mips::COND_GEZ : return Mips::COND_LZ;
- case Mips::COND_LZ : return Mips::COND_GEZ;
- case Mips::COND_LEZ : return Mips::COND_GZ;
- case Mips::FCOND_F : return Mips::FCOND_T;
- case Mips::FCOND_UN : return Mips::FCOND_OR;
- case Mips::FCOND_EQ : return Mips::FCOND_NEQ;
- case Mips::FCOND_UEQ: return Mips::FCOND_OGL;
- case Mips::FCOND_OLT: return Mips::FCOND_UGE;
- case Mips::FCOND_ULT: return Mips::FCOND_OGE;
- case Mips::FCOND_OLE: return Mips::FCOND_UGT;
- case Mips::FCOND_ULE: return Mips::FCOND_OGT;
- case Mips::FCOND_SF: return Mips::FCOND_ST;
- case Mips::FCOND_NGLE:return Mips::FCOND_GLE;
- case Mips::FCOND_SEQ: return Mips::FCOND_SNE;
- case Mips::FCOND_NGL: return Mips::FCOND_GL;
- case Mips::FCOND_LT: return Mips::FCOND_NLT;
- case Mips::FCOND_NGE: return Mips::FCOND_GE;
- case Mips::FCOND_LE: return Mips::FCOND_NLE;
- case Mips::FCOND_NGT: return Mips::FCOND_GT;
- }
+static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
+ MachineBasicBlock *&BB,
+ SmallVectorImpl<MachineOperand>& Cond) {
+ assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
+ int NumOp = Inst->getNumExplicitOperands();
+
+ // for both int and fp branches, the last explicit operand is the
+ // MBB.
+ BB = Inst->getOperand(NumOp-1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(Opc));
+
+ for (int i=0; i<NumOp-1; i++)
+ Cond.push_back(Inst->getOperand(i));
}
-bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const
+ bool AllowModify) const
{
- // If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ if (I == REnd || !isUnpredicatedTerminator(&*I)) {
+ // If this block ends with no branches (it just falls through to its succ)
+ // just return false, leaving TBB/FBB null.
+ TBB = FBB = NULL;
return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
}
- if (!isUnpredicatedTerminator(I))
- return false;
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = I;
-
- // If there is only one terminator instruction, process it.
+
+ MachineInstr *LastInst = &*I;
unsigned LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (!LastInst->getDesc().isBranch())
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (!GetAnalyzableBrOpc(LastOpc))
+ return true;
+
+ // Get the second to last instruction in the block.
+ unsigned SecondLastOpc = 0;
+ MachineInstr *SecondLastInst = NULL;
+
+ if (++I != REnd) {
+ SecondLastInst = &*I;
+ SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode());
+
+ // Not an analyzable branch (must be an indirect jump).
+ if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc)
return true;
+ }
+ // If there is only one terminator instruction, process it.
+ if (!SecondLastOpc) {
// Unconditional branch
if (LastOpc == Mips::J) {
TBB = LastInst->getOperand(0).getMBB();
return false;
}
- Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
- if (BranchCode == Mips::COND_INVALID)
- return true; // Can't handle indirect branch.
-
// Conditional branch
- // Block ends with fall-through condbranch.
- if (LastOpc != Mips::COND_INVALID) {
- int LastNumOp = LastInst->getNumOperands();
-
- TBB = LastInst->getOperand(LastNumOp-1).getMBB();
- Cond.push_back(MachineOperand::CreateImm(BranchCode));
-
- for (int i=0; i<LastNumOp-1; i++) {
- Cond.push_back(LastInst->getOperand(i));
- }
-
- return false;
- }
+ AnalyzeCondBr(LastInst, LastOpc, TBB, Cond);
+ return false;
}
-
- // Get the instruction before it if it is a terminator.
- MachineInstr *SecondLastInst = I;
-
+
+ // If we reached here, there are two branches.
// If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ if (++I != REnd && isUnpredicatedTerminator(&*I))
return true;
- // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it.
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
- Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+ // If second to last instruction is an unconditional branch,
+ // analyze it and remove the last instruction.
+ if (SecondLastOpc == Mips::J) {
+ // Return if the last instruction cannot be removed.
+ if (!AllowModify)
+ return true;
- if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) {
- int SecondNumOp = SecondLastInst->getNumOperands();
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ LastInst->eraseFromParent();
+ return false;
+ }
- TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB();
- Cond.push_back(MachineOperand::CreateImm(BranchCode));
+ // Conditional branch followed by an unconditional branch.
+ // The last one must be unconditional.
+ if (LastOpc != Mips::J)
+ return true;
- for (int i=0; i<SecondNumOp-1; i++) {
- Cond.push_back(SecondLastInst->getOperand(i));
- }
+ AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
+ return false;
+}
- // If the block ends with two unconditional branches, handle it. The last
- // one is not executed, so remove it.
- if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return false;
- }
+void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond)
+ const {
+ unsigned Opc = Cond[0].getImm();
+ const TargetInstrDesc &TID = get(Opc);
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, TID);
- // Otherwise, can't handle this.
- return true;
+ for (unsigned i = 1; i < Cond.size(); ++i)
+ MIB.addReg(Cond[i].getReg());
+
+ MIB.addMBB(TBB);
}
unsigned MipsInstrInfo::
-InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) &&
- "Mips branch conditions can have two|three components!");
- if (FBB == 0) { // One way branch.
- if (Cond.empty()) {
- // Unconditional branch?
- BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
- } else {
- // Conditional branch.
- unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
- const TargetInstrDesc &TID = get(Opc);
-
- if (TID.getNumOperands() == 3)
- BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
- .addReg(Cond[2].getReg())
- .addMBB(TBB);
- else
- BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
- .addMBB(TBB);
-
- }
- return 1;
- }
-
- // Two-way Conditional branch.
- unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
- const TargetInstrDesc &TID = get(Opc);
+ // # of condition operands:
+ // Unconditional branches: 0
+ // Floating point branches: 1 (opc)
+ // Int BranchZero: 2 (opc, reg)
+ // Int Branch: 3 (opc, reg0, reg1)
+ assert((Cond.size() <= 3) &&
+ "# of Mips branch conditions must be <= 3!");
- if (TID.getNumOperands() == 3)
- BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
- .addMBB(TBB);
- else
- BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB);
+ // Two-way Conditional branch.
+ if (FBB) {
+ BuildCondBr(MBB, TBB, DL, Cond);
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+ return 2;
+ }
- BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
- return 2;
+ // One way branch.
+ // Unconditional branch.
+ if (Cond.empty())
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+ else // Conditional branch.
+ BuildCondBr(MBB, TBB, DL, Cond);
+ return 1;
}
unsigned MipsInstrInfo::
-RemoveBranch(MachineBasicBlock &MBB) const
+RemoveBranch(MachineBasicBlock &MBB) const
{
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
- if (I->getOpcode() != Mips::J &&
- GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
- return 0;
-
- // Remove the branch.
- I->eraseFromParent();
-
- I = MBB.end();
-
- if (I == MBB.begin()) return 1;
- --I;
- if (GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
- return 1;
-
- // Remove the branch.
- I->eraseFromParent();
- return 2;
+ MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
+ MachineBasicBlock::reverse_iterator FirstBr;
+ unsigned removed;
+
+ // Skip all the debug instructions.
+ while (I != REnd && I->isDebugValue())
+ ++I;
+
+ FirstBr = I;
+
+ // Up to 2 branches are removed.
+ // Note that indirect branches are not removed.
+ for(removed = 0; I != REnd && removed < 2; ++I, ++removed)
+ if (!GetAnalyzableBrOpc(I->getOpcode()))
+ break;
+
+ MBB.erase(I.base(), FirstBr.base());
+
+ return removed;
}
-/// ReverseBranchCondition - Return the inverse opcode of the
+/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
bool MipsInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
{
- assert( (Cond.size() == 3 || Cond.size() == 2) &&
+ assert( (Cond.size() && Cond.size() <= 3) &&
"Invalid Mips branch condition!");
- Cond[0].setImm(GetOppositeBranchCondition((Mips::CondCode)Cond[0].getImm()));
+ Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
return false;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 52a3d39..5fdbf1f 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -37,7 +37,7 @@ namespace Mips {
// To be used with float branch True
FCOND_F,
FCOND_UN,
- FCOND_EQ,
+ FCOND_OEQ,
FCOND_UEQ,
FCOND_OLT,
FCOND_ULT,
@@ -57,8 +57,8 @@ namespace Mips {
// above ones, but are used with a branch False;
FCOND_T,
FCOND_OR,
- FCOND_NEQ,
- FCOND_OGL,
+ FCOND_UNE,
+ FCOND_ONE,
FCOND_UGE,
FCOND_OGE,
FCOND_UGT,
@@ -70,27 +70,15 @@ namespace Mips {
FCOND_NLT,
FCOND_GE,
FCOND_NLE,
- FCOND_GT,
-
- // Only integer conditions
- COND_E,
- COND_GZ,
- COND_GEZ,
- COND_LZ,
- COND_LEZ,
- COND_NE,
- COND_INVALID
+ FCOND_GT
};
-
- // Turn condition code into conditional branch opcode.
- unsigned GetCondBranchFromCond(CondCode CC);
- /// GetOppositeBranchCondition - Return the inverse of the specified cond,
- /// e.g. turning COND_E to COND_NE.
- CondCode GetOppositeBranchCondition(Mips::CondCode CC);
+ /// GetOppositeBranchOpc - Return the inverse of the specified
+ /// opcode, e.g. turning BEQ to BNE.
+ unsigned GetOppositeBranchOpc(unsigned Opc);
/// MipsCCToString - Map each FP condition code to its string
- inline static const char *MipsFCCToString(Mips::CondCode CC)
+ inline static const char *MipsFCCToString(Mips::CondCode CC)
{
switch (CC) {
default: llvm_unreachable("Unknown condition code");
@@ -98,10 +86,10 @@ namespace Mips {
case FCOND_T: return "f";
case FCOND_UN:
case FCOND_OR: return "un";
- case FCOND_EQ:
- case FCOND_NEQ: return "eq";
+ case FCOND_OEQ:
+ case FCOND_UNE: return "eq";
case FCOND_UEQ:
- case FCOND_OGL: return "ueq";
+ case FCOND_ONE: return "ueq";
case FCOND_OLT:
case FCOND_UGE: return "olt";
case FCOND_ULT:
@@ -121,11 +109,11 @@ namespace Mips {
case FCOND_LT:
case FCOND_NLT: return "lt";
case FCOND_NGE:
- case FCOND_GE: return "ge";
+ case FCOND_GE: return "nge";
case FCOND_LE:
- case FCOND_NLE: return "nle";
+ case FCOND_NLE: return "le";
case FCOND_NGT:
- case FCOND_GT: return "gt";
+ case FCOND_GT: return "ngt";
}
}
}
@@ -138,27 +126,27 @@ namespace MipsII {
enum TOF {
//===------------------------------------------------------------------===//
// Mips Specific MachineOperand flags.
-
+
MO_NO_FLAG,
/// MO_GOT - Represents the offset into the global offset table at which
/// the address the relocation entry symbol resides during execution.
MO_GOT,
- /// MO_GOT_CALL - Represents the offset into the global offset table at
- /// which the address of a call site relocation entry symbol resides
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
/// during execution. This is different from the above since this flag
/// can only be present in call instructions.
MO_GOT_CALL,
- /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// MO_GPREL - Represents the offset from the current gp value to be used
/// for the relocatable object file being produced.
MO_GPREL,
- /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
- /// address.
- MO_ABS_HILO
-
+ /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+ /// address.
+ MO_ABS_HI,
+ MO_ABS_LO
};
}
@@ -181,7 +169,7 @@ public:
/// any side effects other than loading from the stack slot.
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
-
+
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
@@ -189,13 +177,19 @@ public:
/// any side effects other than storing to the stack slot.
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
-
+
/// Branch Analysis
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+private:
+ void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+ const SmallVectorImpl<MachineOperand>& Cond) const;
+
+public:
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
@@ -220,7 +214,7 @@ public:
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
/// Insert nop instruction when hazard condition is found
- virtual void insertNoop(MachineBasicBlock &MBB,
+ virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
/// getGlobalBaseReg - Return a virtual register initialized with the
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index b70266a..19b9c35 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -19,18 +19,19 @@ include "MipsInstrFormats.td"
def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
-def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>,
- SDTCisSameAs<2, 3>, SDTCisInt<1>]>;
def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>,
- SDTCisInt<4>]>;
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<3, 4>,
+ SDTCisInt<4>]>;
def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
+def SDT_MipsMAddMSub : SDTypeProfile<0, 4,
[SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>,
+ SDTCisSameAs<1, 2>,
SDTCisSameAs<2, 3>]>;
-
+def SDT_MipsDivRem : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i32>,
+ SDTCisSameAs<0, 1>]>;
// Call
def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
@@ -54,9 +55,6 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-// Select Condition Code
-def MipsSelectCC : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>;
-
// MAdd*/MSub* nodes
def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
[SDNPOptInGlue, SDNPOutGlue]>;
@@ -67,6 +65,12 @@ def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
[SDNPOptInGlue, SDNPOutGlue]>;
+// DivRem(u) nodes
+def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
+ [SDNPOutGlue]>;
+def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+ [SDNPOutGlue]>;
+
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
@@ -165,7 +169,7 @@ class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
class MArithR<bits<6> func, string instr_asm, SDNode op> :
FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
- !strconcat(instr_asm, "\t$rs, $rt"),
+ !strconcat(instr_asm, "\t$rs, $rt"),
[(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul>;
// Logical
@@ -185,7 +189,7 @@ class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
[(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
// Shifts
-class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
+class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
SDNode OpNode>:
FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
!strconcat(instr_asm, "\t$dst, $b, $c"),
@@ -193,7 +197,7 @@ class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
let rs = _rs;
}
-class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
SDNode OpNode>:
FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
!strconcat(instr_asm, "\t$dst, $b, $c"),
@@ -283,9 +287,16 @@ let isCall=1, hasDelaySlot=1,
}
// Mul, Div
-class MulDiv<bits<6> func, string instr_asm, InstrItinClass itin>:
- FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
- !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+let Defs = [HI, LO] in {
+ class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+ !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+
+ class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+ FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+ !strconcat(instr_asm, "\t$$zero, $a, $b"),
+ [(op CPURegs:$a, CPURegs:$b)], itin>;
+}
// Move from Hi/Lo
class MoveFromLOHI<bits<6> func, string instr_asm>:
@@ -348,6 +359,11 @@ def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>;
def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>;
def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
+// These macros are inserted to prevent GAS from complaining
+// when using the AT register.
+def NOAT : MipsPseudo<(outs), (ins), ".set\tnoat", []>;
+def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>;
+
// When handling PIC code the assembler needs .cpload and .cprestore
// directives. If the real instructions corresponding these directives
// are used, we have the same behavior, but get also a bunch of warnings
@@ -355,18 +371,6 @@ def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>;
-// The supported Mips ISAs dont have any instruction close to the SELECT_CC
-// operation. The solution is to create a Mips pseudo SELECT_CC instruction
-// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally
-// replace it for real supported nodes into EmitInstrWithCustomInserter
-let usesCustomInserter = 1 in {
- class PseudoSelCC<RegisterClass RC, string asmstr>:
- MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr,
- [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>;
-}
-
-def Select_CC : PseudoSelCC<CPURegs, "# MipsSelect_CC_i32">;
-
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
@@ -447,12 +451,10 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1,
"jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
/// Multiply and Divide Instructions.
-let Defs = [HI, LO] in {
- def MULT : MulDiv<0x18, "mult", IIImul>;
- def MULTu : MulDiv<0x19, "multu", IIImul>;
- def DIV : MulDiv<0x1a, "div", IIIdiv>;
- def DIVu : MulDiv<0x1b, "divu", IIIdiv>;
-}
+def MULT : Mul<0x18, "mult", IIImul>;
+def MULTu : Mul<0x19, "multu", IIImul>;
+def SDIV : Div<MipsDivRem, 0x1a, "div", IIIdiv>;
+def UDIV : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>;
let Defs = [HI] in
def MTHI : MoveToLOHI<0x11, "mthi">;
@@ -489,10 +491,19 @@ let Predicates = [HasSwap] in {
def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>;
def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
-let Predicates = [HasCondMov], Constraints = "$F = $dst" in {
- def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>;
- def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>;
-}
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// flag:int, data:int
+let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in
+ class CondMovIntInt<bits<6> funct, string instr_asm> :
+ FR<0, funct, (outs CPURegs:$dst),
+ (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F),
+ !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>;
+
+def MOVZ_I : CondMovIntInt<0x0a, "movz">;
+def MOVN_I : CondMovIntInt<0x0b, "movn">;
/// No operation
let addr=0 in
@@ -533,7 +544,7 @@ def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
(SUBu CPURegs:$lhs, CPURegs:$rhs)>;
def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
(ADDu CPURegs:$lhs, CPURegs:$rhs)>;
-def : Pat<(addc CPURegs:$src, imm:$imm),
+def : Pat<(addc CPURegs:$src, immSExt16:$imm),
(ADDiu CPURegs:$src, imm:$imm)>;
// Call
@@ -546,8 +557,11 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
// hi/lo relocs
def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
(ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+ (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
@@ -599,33 +613,43 @@ def : Pat<(brcond CPURegs:$cond, bb:$dst),
(BNE CPURegs:$cond, ZERO, bb:$dst)>;
// select patterns
-def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
- (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$lhs, CPURegs:$rhs))>;
-def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
- (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs))>;
-def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), CPURegs:$T, CPURegs:$F),
- (MOVZ CPURegs:$F, CPURegs:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs))>;
-def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), CPURegs:$T, CPURegs:$F),
- (MOVZ CPURegs:$F, CPURegs:$T, (SLTiu CPURegs:$lh, immSExt16:$rh))>;
-
-def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
- (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$rhs, CPURegs:$lhs))>;
-def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
- (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs))>;
-
-def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
- (MOVZ CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
-def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
- (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
-
-def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F),
- (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>;
+multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
+ def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
+ def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
+ def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+ def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
+ def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F),
+ (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
+ def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F),
+ (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
+ def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
+ (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
+ def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F),
+ (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
+}
+
+defm : MovzPats<CPURegs, MOVZ_I>;
+defm : MovnPats<CPURegs, MOVN_I>;
// select patterns with got access
-def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs),
- (i32 tglobaladdr:$T), CPURegs:$F),
- (MOVN CPURegs:$F, (ADDiu GP, tglobaladdr:$T),
- (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+let AddedComplexity = 10 in
+ def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs),
+ (i32 tglobaladdr:$T), CPURegs:$F),
+ (MOVN_I CPURegs:$F, (ADDiu GP, tglobaladdr:$T),
+ (XOR CPURegs:$lhs, CPURegs:$rhs))>;
// setcc patterns
def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h
index 15a867e..41b7192 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MipsMCAsmInfo.h
@@ -19,7 +19,7 @@
namespace llvm {
class Target;
-
+
class MipsMCAsmInfo : public MCAsmInfo {
public:
explicit MipsMCAsmInfo(const Target &T, StringRef TT);
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 3719e58..c09b129 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -38,7 +38,7 @@
using namespace llvm;
-MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
const TargetInstrInfo &tii)
: MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
Subtarget(ST), TII(tii) {}
@@ -46,7 +46,7 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
/// getRegisterNumbering - Given the enum value for some register, e.g.
/// Mips::RA, return the number that it corresponds to (e.g. 31).
unsigned MipsRegisterInfo::
-getRegisterNumbering(unsigned RegEnum)
+getRegisterNumbering(unsigned RegEnum)
{
switch (RegEnum) {
case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
@@ -82,30 +82,30 @@ getRegisterNumbering(unsigned RegEnum)
case Mips::FP : case Mips::F30: case Mips::D15: return 30;
case Mips::RA : case Mips::F31: return 31;
default: llvm_unreachable("Unknown register number!");
- }
+ }
return 0; // Not reached
}
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
//===----------------------------------------------------------------------===//
-// Callee Saved Registers methods
+// Callee Saved Registers methods
//===----------------------------------------------------------------------===//
/// Mips Callee Saved Registers
const unsigned* MipsRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const
+getCalleeSavedRegs(const MachineFunction *MF) const
{
// Mips callee-save register range is $16-$23, $f20-$f30
static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
- Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+ Mips::S0, Mips::S1, Mips::S2, Mips::S3,
Mips::S4, Mips::S5, Mips::S6, Mips::S7,
- Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25,
+ Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25,
Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, 0
};
static const unsigned BitMode32CalleeSavedRegs[] = {
- Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+ Mips::S0, Mips::S1, Mips::S2, Mips::S3,
Mips::S4, Mips::S5, Mips::S6, Mips::S7,
Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0
};
@@ -132,11 +132,11 @@ getReservedRegs(const MachineFunction &MF) const {
if (!Subtarget.isSingleFloat())
for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
Reserved.set(FReg);
-
+
return Reserved;
}
-// This function eliminate ADJCALLSTACKDOWN,
+// This function eliminate ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
void MipsRegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -157,7 +157,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned i = 0;
while (!MI.getOperand(i).isFI()) {
++i;
- assert(i < MI.getNumOperands() &&
+ assert(i < MI.getNumOperands() &&
"Instr doesn't have FrameIndex operand!");
}
@@ -179,8 +179,43 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
- MI.getOperand(i-1).ChangeToImmediate(Offset);
- MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+ unsigned NewReg = 0;
+ int NewImm = 0;
+ MachineBasicBlock &MBB = *MI.getParent();
+ bool ATUsed;
+ unsigned OrigReg = getFrameRegister(MF);
+ int OrigImm = Offset;
+
+// OrigImm fits in the 16-bit field
+ if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+ NewReg = OrigReg;
+ NewImm = OrigImm;
+ ATUsed = false;
+ }
+ else {
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+ DebugLoc DL = II->getDebugLoc();
+ int ImmLo = OrigImm & 0xffff;
+ int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
+ ((OrigImm & 0x8000) != 0);
+
+ // FIXME: change this when mips goes MC".
+ BuildMI(MBB, II, DL, TII->get(Mips::NOAT));
+ BuildMI(MBB, II, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+ BuildMI(MBB, II, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
+ .addReg(Mips::AT);
+ NewReg = Mips::AT;
+ NewImm = ImmLo;
+
+ ATUsed = true;
+ }
+
+ // FIXME: change this when mips goes MC".
+ if (ATUsed)
+ BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+
+ MI.getOperand(i).ChangeToRegister(NewReg, false);
+ MI.getOperand(i-1).ChangeToImmediate(NewImm);
}
void MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index a7f4bf9..767359f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -26,7 +26,7 @@ class Type;
struct MipsRegisterInfo : public MipsGenRegisterInfo {
const MipsSubtarget &Subtarget;
const TargetInstrInfo &TII;
-
+
MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
/// getRegisterNumbering - Given the enum value for some register, e.g.
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 60efe31..9f9cae7 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -17,7 +17,7 @@ class MipsReg<string n> : Register<n> {
let Namespace = "Mips";
}
-class MipsRegWithSubRegs<string n, list<Register> subregs>
+class MipsRegWithSubRegs<string n, list<Register> subregs>
: RegisterWithSubRegs<n, subregs> {
field bits<5> Num;
let Namespace = "Mips";
@@ -83,7 +83,7 @@ let Namespace = "Mips" in {
def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>;
def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>;
def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>;
-
+
/// Mips Single point precision FPU Registers
def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>;
def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>;
@@ -117,7 +117,7 @@ let Namespace = "Mips" in {
def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
-
+
/// Mips Double point precision FPU Registers (aliased
/// with the single precision to hold 64 bit values)
def D0 : AFPR< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>;
@@ -149,11 +149,11 @@ let Namespace = "Mips" in {
// Register Classes
//===----------------------------------------------------------------------===//
-def CPURegs : RegisterClass<"Mips", [i32], 32,
+def CPURegs : RegisterClass<"Mips", [i32], 32,
// Return Values and Arguments
[V0, V1, A0, A1, A2, A3,
// Not preserved across procedure calls
- T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
// Callee save
S0, S1, S2, S3, S4, S5, S6, S7,
// Reserved
@@ -173,16 +173,16 @@ def CPURegs : RegisterClass<"Mips", [i32], 32,
// 64bit fp:
// * FGR64 - 32 64-bit registers
-// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
//
// 32bit fp:
// * FGR32 - 16 32-bit even registers
// * FGR32 - 32 32-bit registers (single float only mode)
-def FGR32 : RegisterClass<"Mips", [f32], 32,
+def FGR32 : RegisterClass<"Mips", [f32], 32,
// Return Values and Arguments
[F0, F1, F2, F3, F12, F13, F14, F15,
// Not preserved across procedure calls
- F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19,
+ F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19,
// Callee save
F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
// Reserved
@@ -195,17 +195,17 @@ def FGR32 : RegisterClass<"Mips", [f32], 32,
let MethodBodies = [{
static const unsigned MIPS_FGR32[] = {
- Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13,
- Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7,
- Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17,
- Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23,
- Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
+ Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13,
+ Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7,
+ Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17,
+ Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23,
+ Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
Mips::F30
};
static const unsigned MIPS_SVR4_FGR32[] = {
- Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4,
- Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18,
+ Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4,
+ Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18,
Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30,
};
@@ -217,7 +217,7 @@ def FGR32 : RegisterClass<"Mips", [f32], 32,
if (Subtarget.isSingleFloat())
return MIPS_FGR32;
else
- return MIPS_SVR4_FGR32;
+ return MIPS_SVR4_FGR32;
}
FGR32Class::iterator
@@ -233,11 +233,11 @@ def FGR32 : RegisterClass<"Mips", [f32], 32,
}];
}
-def AFGR64 : RegisterClass<"Mips", [f64], 64,
+def AFGR64 : RegisterClass<"Mips", [f64], 64,
// Return Values and Arguments
[D0, D1, D6, D7,
// Not preserved across procedure calls
- D2, D3, D4, D5, D8, D9,
+ D2, D3, D4, D5, D8, D9,
// Callee save
D10, D11, D12, D13, D14,
// Reserved
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 49ca5d1..00be8ee 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -14,7 +14,7 @@ def ALU : FuncUnit;
def IMULDIV : FuncUnit;
//===----------------------------------------------------------------------===//
-// Instruction Itinerary classes used for Mips
+// Instruction Itinerary classes used for Mips
//===----------------------------------------------------------------------===//
def IIAlu : InstrItinClass;
def IILoad : InstrItinClass;
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index db114da..70747f5d 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -17,7 +17,7 @@
using namespace llvm;
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS,
- bool little) :
+ bool little) :
MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
@@ -33,7 +33,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS,
if (TT.find("linux") == std::string::npos)
IsLinux = false;
- // When only the target triple is specified and is
+ // When only the target triple is specified and is
// a allegrex target, set the features. We also match
// big and little endian allegrex cores (dont really
// know if a big one exists)
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index e4f4b33..096bbed 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -26,7 +26,7 @@ class MipsSubtarget : public TargetSubtarget {
public:
enum MipsABIEnum {
O32, O64, N32, N64, EABI
- };
+ };
protected:
@@ -34,10 +34,10 @@ protected:
Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
};
- // Mips architecture version
+ // Mips architecture version
MipsArchEnum MipsArchVersion;
- // Mips supported ABIs
+ // Mips supported ABIs
MipsABIEnum MipsABI;
// IsLittle - The target is Little Endian
@@ -61,14 +61,14 @@ protected:
bool IsLinux;
/// Features related to the presence of specific instructions.
-
+
// HasSEInReg - SEB and SEH (signext in register) instructions.
bool HasSEInReg;
// HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
bool HasCondMov;
- // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
+ // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
// instructions.
bool HasMulDivAdd;
@@ -93,14 +93,14 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
MipsSubtarget(const std::string &TT, const std::string &FS, bool little);
-
- /// ParseSubtargetFeatures - Parses features string setting specified
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
std::string ParseSubtargetFeatures(const std::string &FS,
const std::string &CPU);
bool isMips1() const { return MipsArchVersion == Mips1; }
- bool isMips32() const { return MipsArchVersion >= Mips32; }
+ bool isMips32() const { return MipsArchVersion >= Mips32; }
bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
bool isLittle() const { return IsLittle; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 7a2dd1f..53190b4 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -75,3 +75,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
PM.add(createMipsDelaySlotFillerPass(*this));
return true;
}
+
+bool MipsTargetMachine::
+addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ PM.add(createMipsExpandPseudoPass(*this));
+ return true;
+}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 43ab798..badb652 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -63,6 +63,7 @@ namespace llvm {
CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
+ virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
};
/// MipselTargetMachine - Mipsel target machine.
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 237b160..c394a9d 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -18,22 +18,22 @@ namespace llvm {
const MCSection *SmallDataSection;
const MCSection *SmallBSSSection;
public:
-
+
void Initialize(MCContext &Ctx, const TargetMachine &TM);
-
+
/// IsGlobalInSmallSection - Return true if this global address should be
/// placed into small data/bss section.
bool IsGlobalInSmallSection(const GlobalValue *GV,
const TargetMachine &TM, SectionKind Kind)const;
bool IsGlobalInSmallSection(const GlobalValue *GV,
- const TargetMachine &TM) const;
-
+ const TargetMachine &TM) const;
+
const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
SectionKind Kind,
Mangler *Mang,
const TargetMachine &TM) const;
-
+
// TODO: Classify globals as mips wishes.
};
} // end namespace llvm
diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index cc3d61e..a8d6fe9 100644
--- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -14,7 +14,7 @@ using namespace llvm;
Target llvm::TheMipsTarget, llvm::TheMipselTarget;
-extern "C" void LLVMInitializeMipsTargetInfo() {
+extern "C" void LLVMInitializeMipsTargetInfo() {
RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips");
RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel");
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index 19385ba..ec2be92 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -29,6 +29,11 @@ namespace llvm {
PARAMETER = 3,
SHARED = 4
};
+
+ enum Predicate {
+ PRED_NORMAL = 0,
+ PRED_NEGATE = 1
+ };
} // namespace PTX
FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
@@ -37,7 +42,8 @@ namespace llvm {
FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
- extern Target ThePTXTarget;
+ extern Target ThePTX32Target;
+ extern Target ThePTX64Target;
} // namespace llvm;
// Defines symbolic names for PTX registers.
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 8b1a1b1..ae8326e 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -19,8 +19,35 @@ include "llvm/Target/Target.td"
// Subtarget Features.
//===----------------------------------------------------------------------===//
-def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true",
- "Enable sm_20 target architecture">;
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+ "Do not demote .f64 to .f32">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+ "Use PTX Language Version 2.0",
+ []>;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+ "Use PTX Language Version 2.1",
+ [FeaturePTX20]>;
+
+def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
+ "Use PTX Language Version 2.2",
+ [FeaturePTX21]>;
+
+//===- PTX Shader Model ---------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
+ "Enable Shader Model 1.0 compliance">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
+ "Enable Shader Model 1.3 compliance",
+ [FeatureSM10, FeatureDouble]>;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
+ "Enable Shader Model 2.0 compliance",
+ [FeatureSM13]>;
//===----------------------------------------------------------------------===//
// PTX supported processors.
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index a605997..29c4781 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
@@ -37,13 +38,6 @@
using namespace llvm;
-static cl::opt<std::string>
-OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4"));
-
-static cl::opt<std::string>
-OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"),
- cl::init("sm_10"));
-
namespace {
class PTXAsmPrinter : public AsmPrinter {
public:
@@ -68,6 +62,7 @@ public:
const char *Modifier = 0);
void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
const char *Modifier = 0);
+ void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
// autogen'd.
void printInstruction(const MachineInstr *MI, raw_ostream &OS);
@@ -82,27 +77,20 @@ private:
static const char PARAM_PREFIX[] = "__param_";
static const char *getRegisterTypeName(unsigned RegNo) {
-#define TEST_REGCLS(cls, clsstr) \
+#define TEST_REGCLS(cls, clsstr) \
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
- TEST_REGCLS(RRegs32, s32);
TEST_REGCLS(Preds, pred);
+ TEST_REGCLS(RRegu16, u16);
+ TEST_REGCLS(RRegu32, u32);
+ TEST_REGCLS(RRegu64, u64);
+ TEST_REGCLS(RRegf32, f32);
+ TEST_REGCLS(RRegf64, f64);
#undef TEST_REGCLS
llvm_unreachable("Not in any register class!");
return NULL;
}
-static const char *getInstructionTypeName(const MachineInstr *MI) {
- for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (MO.getType() == MachineOperand::MO_Register)
- return getRegisterTypeName(MO.getReg());
- }
-
- llvm_unreachable("No reg operand found in instruction!");
- return NULL;
-}
-
static const char *getStateSpaceName(unsigned addressSpace) {
switch (addressSpace) {
default: llvm_unreachable("Unknown state space");
@@ -115,6 +103,28 @@ static const char *getStateSpaceName(unsigned addressSpace) {
return NULL;
}
+static const char *getTypeName(const Type* type) {
+ while (true) {
+ switch (type->getTypeID()) {
+ default: llvm_unreachable("Unknown type");
+ case Type::FloatTyID: return ".f32";
+ case Type::DoubleTyID: return ".f64";
+ case Type::IntegerTyID:
+ switch (type->getPrimitiveSizeInBits()) {
+ default: llvm_unreachable("Unknown integer bit-width");
+ case 16: return ".u16";
+ case 32: return ".u32";
+ case 64: return ".u64";
+ }
+ case Type::ArrayTyID:
+ case Type::PointerTyID:
+ type = dyn_cast<const SequentialType>(type)->getElementType();
+ break;
+ }
+ }
+ return NULL;
+}
+
bool PTXAsmPrinter::doFinalization(Module &M) {
// XXX Temproarily remove global variables so that doFinalization() will not
// emit them again (global variables are emitted at beginning).
@@ -146,8 +156,12 @@ bool PTXAsmPrinter::doFinalization(Module &M) {
void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
{
- OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion));
- OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget));
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
+ OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+ (ST.supportsDouble() ? ""
+ : ", map_f64_to_f32")));
OutStreamer.AddBlankLine();
// declare global variables
@@ -186,17 +200,16 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
std::string str;
str.reserve(64);
- // Write instruction to str
raw_string_ostream OS(str);
+
+ // Emit predicate
+ printPredicateOperand(MI, OS);
+
+ // Write instruction to str
printInstruction(MI, OS);
OS << ';';
OS.flush();
- // Replace "%type" if found
- size_t pos;
- if ((pos = str.find("%type")) != std::string::npos)
- str.replace(pos, /*strlen("%type")==*/5, getInstructionTypeName(MI));
-
StringRef strref = StringRef(str);
OutStreamer.EmitRawText(strref);
}
@@ -213,11 +226,36 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
OS << *Mang->getSymbol(MO.getGlobal());
break;
case MachineOperand::MO_Immediate:
- OS << (int) MO.getImm();
+ OS << (long) MO.getImm();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ OS << *MO.getMBB()->getSymbol();
break;
case MachineOperand::MO_Register:
OS << getRegisterName(MO.getReg());
break;
+ case MachineOperand::MO_FPImmediate:
+ APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
+ bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
+ // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
+ if (isFloat) {
+ OS << "0F";
+ }
+ else {
+ OS << "0D";
+ }
+ // Emit the encoded floating-point value.
+ if (constFP.getZExtValue() > 0) {
+ OS << constFP.toString(16, false);
+ }
+ else {
+ OS << "00000000";
+ // If We have a double-precision zero, pad to 8-bytes.
+ if (!isFloat) {
+ OS << "00000000";
+ }
+ }
+ break;
}
}
@@ -265,13 +303,77 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
decl += " ";
}
- // TODO: add types
- decl += ".s32 ";
- decl += gvsym->getName();
+ if (PointerType::classof(gv->getType())) {
+ const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType());
+ const Type* elementTy = pointerTy->getElementType();
+
+ decl += ".b8 ";
+ decl += gvsym->getName();
+ decl += "[";
+
+ if (elementTy->isArrayTy())
+ {
+ assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
- if (ArrayType::classof(gv->getType()) || PointerType::classof(gv->getType()))
- decl += "[]";
+ const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy);
+ elementTy = arrayTy->getElementType();
+
+ unsigned numElements = arrayTy->getNumElements();
+
+ while (elementTy->isArrayTy()) {
+
+ arrayTy = dyn_cast<const ArrayType>(elementTy);
+ elementTy = arrayTy->getElementType();
+
+ numElements *= arrayTy->getNumElements();
+ }
+
+ // FIXME: isPrimitiveType() == false for i16?
+ assert(elementTy->isSingleValueType() &&
+ "Non-primitive types are not handled");
+
+ // Compute the size of the array, in bytes.
+ uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
+ * numElements;
+
+ decl += utostr(arraySize);
+ }
+
+ decl += "]";
+
+ // handle string constants (assume ConstantArray means string)
+
+ if (gv->hasInitializer())
+ {
+ Constant *C = gv->getInitializer();
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
+ {
+ decl += " = {";
+
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+ {
+ if (i > 0) decl += ",";
+
+ decl += "0x" + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+ }
+
+ decl += "}";
+ }
+ }
+ }
+ else {
+ // Note: this is currently the fall-through case and most likely generates
+ // incorrect code.
+ decl += getTypeName(gv->getType());
+ decl += " ";
+
+ decl += gvsym->getName();
+
+ if (ArrayType::classof(gv->getType()) ||
+ PointerType::classof(gv->getType()))
+ decl += "[]";
+ }
decl += ";";
@@ -313,16 +415,24 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
if (!MFI->argRegEmpty()) {
decl += " (";
if (isKernel) {
- for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
- if (i != 0)
+ unsigned cnt = 0;
+ for(PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+ i != e; ++i) {
+ reg = *i;
+ assert(reg != PTX::NoRegister && "Not a valid register!");
+ if (i != b)
decl += ", ";
- decl += ".param .s32 "; // TODO: add types
+ decl += ".param .";
+ decl += getRegisterTypeName(reg);
+ decl += " ";
decl += PARAM_PREFIX;
- decl += utostr(i + 1);
+ decl += utostr(++cnt);
}
} else {
for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) {
+ i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+ i != e; ++i) {
reg = *i;
assert(reg != PTX::NoRegister && "Not a valid register!");
if (i != b)
@@ -339,9 +449,29 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
OutStreamer.EmitRawText(Twine(decl));
}
+void PTXAsmPrinter::
+printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
+ int i = MI->findFirstPredOperandIdx();
+ if (i == -1)
+ llvm_unreachable("missing predicate operand");
+
+ unsigned reg = MI->getOperand(i).getReg();
+ int predOp = MI->getOperand(i+1).getImm();
+
+ DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n");
+
+ if (reg != PTX::NoRegister) {
+ O << '@';
+ if (predOp == PTX::PRED_NEGATE)
+ O << '!';
+ O << getRegisterName(reg);
+ }
+}
+
#include "PTXGenAsmWriter.inc"
// Force static initialization.
extern "C" void LLVMInitializePTXAsmPrinter() {
- RegisterAsmPrinter<PTXAsmPrinter> X(ThePTXTarget);
+ RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
+ RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
}
diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h
index 574ae7a..9320676 100644
--- a/lib/Target/PTX/PTXFrameLowering.h
+++ b/lib/Target/PTX/PTXFrameLowering.h
@@ -27,7 +27,8 @@ protected:
public:
explicit PTXFrameLowering(const PTXSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
+ STI(sti) {
}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
index efb0e8b..b3c85da 100644
--- a/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
#include "PTXTargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -42,8 +43,14 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
private:
SDNode *SelectREAD_PARAM(SDNode *Node);
+ // We need this only because we can't match intruction BRAdp
+ // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
+ SDNode *SelectBRCOND(SDNode *Node);
+
bool isImm(const SDValue &operand);
bool SelectImm(const SDValue &operand, SDValue &imm);
+
+ const PTXSubtarget& getSubtarget() const;
}; // class PTXDAGToDAGISel
} // namespace
@@ -59,21 +66,62 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
: SelectionDAGISel(TM, OptLevel) {}
SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
- if (Node->getOpcode() == PTXISD::READ_PARAM)
- return SelectREAD_PARAM(Node);
- else
- return SelectCode(Node);
+ switch (Node->getOpcode()) {
+ case PTXISD::READ_PARAM:
+ return SelectREAD_PARAM(Node);
+ case ISD::BRCOND:
+ return SelectBRCOND(Node);
+ default:
+ return SelectCode(Node);
+ }
}
SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
- SDValue index = Node->getOperand(1);
- DebugLoc dl = Node->getDebugLoc();
+ SDValue index = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+ unsigned opcode;
if (index.getOpcode() != ISD::TargetConstant)
llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
+ if (Node->getValueType(0) == MVT::i16) {
+ opcode = PTX::LDpiU16;
+ }
+ else if (Node->getValueType(0) == MVT::i32) {
+ opcode = PTX::LDpiU32;
+ }
+ else if (Node->getValueType(0) == MVT::i64) {
+ opcode = PTX::LDpiU64;
+ }
+ else if (Node->getValueType(0) == MVT::f32) {
+ opcode = PTX::LDpiF32;
+ }
+ else if (Node->getValueType(0) == MVT::f64) {
+ opcode = PTX::LDpiF64;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type for ld.param");
+ }
+
return PTXInstrInfo::
- GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index);
+ GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
+}
+
+SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
+ assert(Node->getNumOperands() >= 3);
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Pred = Node->getOperand(1);
+ SDValue Target = Node->getOperand(2); // branch target
+ SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ assert(Target.getOpcode() == ISD::BasicBlock);
+ assert(Pred.getValueType() == MVT::i1);
+
+ // Emit BRAdp
+ SDValue Ops[] = { Target, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
}
// Match memory operand of the form [reg+reg]
@@ -82,8 +130,11 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
return false;
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
return true;
}
@@ -95,8 +146,12 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
if (isImm(Addr))
return false;
// it is [reg]
+
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
return true;
}
@@ -129,7 +184,10 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
// is [imm]?
if (SelectImm(Addr, Base)) {
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
return true;
}
@@ -146,6 +204,13 @@ bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
return false;
ConstantSDNode *CN = cast<ConstantSDNode>(node);
- imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32);
+ imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
+ operand.getValueType());
return true;
}
+
+const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
+{
+ return TM.getSubtarget<PTXSubtarget>();
+}
+
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index e6d4490..23b93da 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -27,21 +28,60 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
- addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
-
+ addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
+ addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
+ addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
+ addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
+ addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ // Turn i16 (z)extload into load + (z)extend
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+ // Turn f32 extload into load + fextend
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // Turn f64 truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// Customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+
+ // Expand BR_CC into BRCOND
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // Expand SELECT_CC into SETCC
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ // need to lower SETCC of Preds into bitwise logic
+ setOperationAction(ISD::SETCC, MVT::i1, Custom);
+
// Compute derived properties from the register classes
computeRegisterProperties();
}
+MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+}
+
SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
- default: llvm_unreachable("Unimplemented operand");
- case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ default:
+ llvm_unreachable("Unimplemented operand");
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
}
}
@@ -49,6 +89,8 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default:
llvm_unreachable("Unknown opcode");
+ case PTXISD::COPY_ADDRESS:
+ return "PTXISD::COPY_ADDRESS";
case PTXISD::READ_PARAM:
return "PTXISD::READ_PARAM";
case PTXISD::EXIT:
@@ -62,12 +104,43 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
// Custom Lower Operation
//===----------------------------------------------------------------------===//
+SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Look for X == 0, X == 1, X != 0, or X != 1
+ // We can simplify these to bitwise logic
+
+ if (Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+ return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+ }
+
+ return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
+}
+
SDValue PTXTargetLowering::
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- return DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+
+ assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
+
+ SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
+ dl,
+ PtrVT.getSimpleVT(),
+ targetGlobal);
+
+ return movInstr;
}
//===----------------------------------------------------------------------===//
@@ -87,9 +160,13 @@ struct argmap_entry {
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
} argmap[] = {
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
- argmap_entry(MVT::i32, PTX::RRegs32RegisterClass)
+ argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
+ argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
+ argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
+ argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
+ argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
};
-} // end anonymous namespace
+} // end anonymous namespace
SDValue PTXTargetLowering::
LowerFormalArguments(SDValue Chain,
@@ -185,10 +262,25 @@ SDValue PTXTargetLowering::
if (Outs.size() == 0)
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
- assert(Outs[0].VT == MVT::i32 && "Can return only basic types");
-
SDValue Flag;
- unsigned reg = PTX::R0;
+ unsigned reg;
+
+ if (Outs[0].VT == MVT::i16) {
+ reg = PTX::RH0;
+ }
+ else if (Outs[0].VT == MVT::i32) {
+ reg = PTX::R0;
+ }
+ else if (Outs[0].VT == MVT::i64) {
+ reg = PTX::RD0;
+ }
+ else if (Outs[0].VT == MVT::f32) {
+ reg = PTX::F0;
+ }
+ else {
+ assert(Outs[0].VT == MVT::f64 && "Can return only basic types");
+ reg = PTX::FD0;
+ }
MachineFunction &MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index b03a9f6..6a7e3e6 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -26,7 +26,8 @@ namespace PTXISD {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
READ_PARAM,
EXIT,
- RET
+ RET,
+ COPY_ADDRESS
};
} // namespace PTXISD
@@ -41,6 +42,8 @@ class PTXTargetLowering : public TargetLowering {
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -58,7 +61,9 @@ class PTXTargetLowering : public TargetLowering {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl,
SelectionDAG &DAG) const;
-
+
+ virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
private:
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
}; // class PTXTargetLowering
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 805759b..a12a6d0 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -11,9 +11,15 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "ptx-instrinfo"
+
#include "PTX.h"
#include "PTXInstrInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -27,20 +33,27 @@ static const struct map_entry {
const TargetRegisterClass *cls;
const int opcode;
} map[] = {
- { &PTX::RRegs32RegClass, PTX::MOVrr },
- { &PTX::PredsRegClass, PTX::MOVpp }
+ { &PTX::RRegu16RegClass, PTX::MOVU16rr },
+ { &PTX::RRegu32RegClass, PTX::MOVU32rr },
+ { &PTX::RRegu64RegClass, PTX::MOVU64rr },
+ { &PTX::RRegf32RegClass, PTX::MOVF32rr },
+ { &PTX::RRegf64RegClass, PTX::MOVF64rr },
+ { &PTX::PredsRegClass, PTX::MOVPREDrr }
};
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const {
- for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
- if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) {
- BuildMI(MBB, I, DL,
- get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc));
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
+ if (map[i].cls->contains(DstReg, SrcReg)) {
+ const TargetInstrDesc &TID = get(map[i].opcode);
+ MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPredicate(MI);
return;
}
+ }
llvm_unreachable("Impossible reg-to-reg copy");
}
@@ -56,12 +69,9 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
if (DstRC == map[i].cls) {
- MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode),
- DstReg).addReg(SrcReg);
- if (MI->findFirstPredOperandIdx() == -1) {
- MI->addOperand(MachineOperand::CreateReg(0, false));
- MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0));
- }
+ const TargetInstrDesc &TID = get(map[i].opcode);
+ MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg);
+ AddDefaultPredicate(MI);
return true;
}
@@ -74,8 +84,12 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
switch (MI.getOpcode()) {
default:
return false;
- case PTX::MOVpp:
- case PTX::MOVrr:
+ case PTX::MOVU16rr:
+ case PTX::MOVU32rr:
+ case PTX::MOVU64rr:
+ case PTX::MOVF32rr:
+ case PTX::MOVF64rr:
+ case PTX::MOVPREDrr:
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
"Invalid register-register move instruction");
@@ -85,3 +99,239 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
return true;
}
}
+
+// predicate support
+
+bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
+ int i = MI->findFirstPredOperandIdx();
+ return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
+}
+
+bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+}
+
+bool PTXInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ if (Pred.size() < 2)
+ llvm_unreachable("lesser than 2 predicate operands are provided");
+
+ int i = MI->findFirstPredOperandIdx();
+ if (i == -1)
+ llvm_unreachable("missing predicate operand");
+
+ MI->getOperand(i).setReg(Pred[0].getReg());
+ MI->getOperand(i+1).setImm(Pred[1].getImm());
+
+ return true;
+}
+
+bool PTXInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ const MachineOperand &PredReg1 = Pred1[0];
+ const MachineOperand &PredReg2 = Pred2[0];
+ if (PredReg1.getReg() != PredReg2.getReg())
+ return false;
+
+ const MachineOperand &PredOp1 = Pred1[1];
+ const MachineOperand &PredOp2 = Pred2[1];
+ if (PredOp1.getImm() != PredOp2.getImm())
+ return false;
+
+ return true;
+}
+
+bool PTXInstrInfo::
+DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // If an instruction sets a predicate register, it defines a predicate.
+
+ // TODO supprot 5-operand format of setp instruction
+
+ if (MI->getNumOperands() < 1)
+ return false;
+
+ const MachineOperand &MO = MI->getOperand(0);
+
+ if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass)
+ return false;
+
+ Pred.push_back(MO);
+ Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ return true;
+}
+
+// branch support
+
+bool PTXInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // TODO implement cases when AllowModify is true
+
+ if (MBB.empty())
+ return true;
+
+ MachineBasicBlock::const_iterator iter = MBB.end();
+ const MachineInstr& instLast1 = *--iter;
+ const TargetInstrDesc &desc1 = instLast1.getDesc();
+ // for special case that MBB has only 1 instruction
+ const bool IsSizeOne = MBB.size() == 1;
+ // if IsSizeOne is true, *--iter and instLast2 are invalid
+ // we put a dummy value in instLast2 and desc2 since they are used
+ const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
+ const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
+
+ DEBUG(dbgs() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n");
+
+ // this block ends with no branches
+ if (!IsAnyKindOfBranch(instLast1)) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
+ return false;
+ }
+
+ // this block ends with only an unconditional branch
+ if (desc1.isUnconditionalBranch() &&
+ // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
+ (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
+ TBB = GetBranchTarget(instLast1);
+ return false;
+ }
+
+ // this block ends with a conditional branch and
+ // it falls through to a successor block
+ if (desc1.isConditionalBranch() &&
+ IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
+ TBB = GetBranchTarget(instLast1);
+ int i = instLast1.findFirstPredOperandIdx();
+ Cond.push_back(instLast1.getOperand(i));
+ Cond.push_back(instLast1.getOperand(i+1));
+ return false;
+ }
+
+ // when IsSizeOne is true, we are done
+ if (IsSizeOne)
+ return true;
+
+ // this block ends with a conditional branch
+ // followed by an unconditional branch
+ if (desc2.isConditionalBranch() &&
+ desc1.isUnconditionalBranch()) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
+ TBB = GetBranchTarget(instLast2);
+ FBB = GetBranchTarget(instLast1);
+ int i = instLast2.findFirstPredOperandIdx();
+ Cond.push_back(instLast2.getOperand(i));
+ Cond.push_back(instLast2.getOperand(i+1));
+ return false;
+ }
+
+ // branch cannot be understood
+ DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
+ return true;
+}
+
+unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ unsigned count = 0;
+ while (!MBB.empty())
+ if (IsAnyKindOfBranch(MBB.back())) {
+ MBB.pop_back();
+ ++count;
+ } else
+ break;
+ DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
+ return count;
+}
+
+unsigned PTXInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
+ << "\n";
+ else dbgs() << "InsertBranch: TBB: (NULL)\n");
+ DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
+ << "\n";
+ else dbgs() << "InsertBranch: FBB: (NULL)\n");
+ DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
+
+ assert(TBB && "TBB is NULL");
+
+ if (FBB) {
+ BuildMI(&MBB, DL, get(PTX::BRAdp))
+ .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(PTX::BRAd))
+ .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ return 2;
+ } else if (Cond.size()) {
+ BuildMI(&MBB, DL, get(PTX::BRAdp))
+ .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+ return 1;
+ } else {
+ BuildMI(&MBB, DL, get(PTX::BRAd))
+ .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ return 1;
+ }
+}
+
+// static helper routines
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT, SDValue Op1) {
+ SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue ops[] = { Op1, predReg, predOp };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
+ SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue ops[] = { Op1, Op2, predReg, predOp };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
+ if (MI->findFirstPredOperandIdx() == -1) {
+ MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+ MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ }
+}
+
+bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
+ const TargetInstrDesc &desc = inst.getDesc();
+ return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+}
+
+bool PTXInstrInfo::
+IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
+ for (MachineBasicBlock::const_succ_iterator
+ i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
+ if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
+ return true;
+ return false;
+}
+
+MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
+ // FIXME So far all branch instructions put destination in 1st operand
+ const MachineOperand& target = inst.getOperand(0);
+ assert(target.isMBB() && "FIXME: detect branch target operand");
+ return target.getMBB();
+}
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index e7f00f0..a04be77 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -15,61 +15,93 @@
#define PTX_INSTR_INFO_H
#include "PTXRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
class PTXTargetMachine;
+class MachineSDNode;
+class SDValue;
+class SelectionDAG;
+
class PTXInstrInfo : public TargetInstrInfoImpl {
- private:
- const PTXRegisterInfo RI;
- PTXTargetMachine &TM;
-
- public:
- explicit PTXInstrInfo(PTXTargetMachine &_TM);
-
- virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DstReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg,
- const TargetRegisterClass *DstRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
-
- virtual bool isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
- // static helper routines
-
- static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
- DebugLoc dl, EVT VT,
- SDValue Op1) {
- SDValue pred_reg = DAG->getRegister(0, MVT::i1);
- SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
- SDValue ops[] = { Op1, pred_reg, pred_imm };
- return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
- }
-
- static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
- DebugLoc dl, EVT VT,
- SDValue Op1,
- SDValue Op2) {
- SDValue pred_reg = DAG->getRegister(0, MVT::i1);
- SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
- SDValue ops[] = { Op1, Op2, pred_reg, pred_imm };
- return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
- }
-
- }; // class PTXInstrInfo
+private:
+ const PTXRegisterInfo RI;
+ PTXTargetMachine &TM;
+
+public:
+ explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+ virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
+
+ virtual bool isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ // predicate support
+
+ virtual bool isPredicated(const MachineInstr *MI) const;
+
+ virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ // PTX is fully-predicable
+ virtual bool isPredicable(MachineInstr *MI) const { return true; }
+
+ // branch support
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ // static helper routines
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1);
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2);
+
+ static void AddDefaultPredicate(MachineInstr *MI);
+
+ static bool IsAnyKindOfBranch(const MachineInstr& inst);
+
+ static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
+
+ static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
+}; // class PTXInstrInfo
} // namespace llvm
#endif // PTX_INSTR_INFO_H
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index 9a74778..1ac9d3f 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -18,6 +18,26 @@
include "PTXInstrFormats.td"
//===----------------------------------------------------------------------===//
+// Code Generation Predicates
+//===----------------------------------------------------------------------===//
+
+// Addressing
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+// Shader Model Support
+def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">;
+def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
+def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">;
+def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
+
+// PTX Version Support
+def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
+def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
+def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">;
+def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
+
+//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
@@ -107,24 +127,41 @@ def store_shared
}]>;
// Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
-def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
// Address operands
-def MEMri : Operand<i32> {
+def MEMri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RRegu32, i32imm);
+}
+def MEMri64 : Operand<i64> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RRegs32, i32imm);
+ let MIOperandInfo = (ops RRegu64, i64imm);
}
-def MEMii : Operand<i32> {
+def MEMii32 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops i32imm, i32imm);
}
+def MEMii64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
def MEMpi : Operand<i32> {
let PrintMethod = "printParamOperand";
let MIOperandInfo = (ops i32imm);
}
+// Branch & call targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
//===----------------------------------------------------------------------===//
// PTX Specific Node Definitions
//===----------------------------------------------------------------------===//
@@ -138,66 +175,389 @@ def PTXexit
: SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
def PTXret
: SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>;
+def PTXcopyaddress
+ : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
+//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
+multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
+ def rr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a),
+ !strconcat(opcstr, ".f32\t$d, $a"),
+ [(set RRegf32:$d, (opnode RRegf32:$a))]>;
+ def ri32 : InstPTX<(outs RRegf32:$d),
+ (ins f32imm:$a),
+ !strconcat(opcstr, ".f32\t$d, $a"),
+ [(set RRegf32:$d, (opnode fpimm:$a))]>;
+ def rr64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a),
+ !strconcat(opcstr, ".f64\t$d, $a"),
+ [(set RRegf64:$d, (opnode RRegf64:$a))]>;
+ def ri64 : InstPTX<(outs RRegf64:$d),
+ (ins f64imm:$a),
+ !strconcat(opcstr, ".f64\t$d, $a"),
+ [(set RRegf64:$d, (opnode fpimm:$a))]>;
+}
+
+//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
+multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
+ def rr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b),
+ !strconcat(opcstr, ".f32\t$d, $a, $b"),
+ [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
+ def ri32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, f32imm:$b),
+ !strconcat(opcstr, ".f32\t$d, $a, $b"),
+ [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
+ def rr64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b),
+ !strconcat(opcstr, ".f64\t$d, $a, $b"),
+ [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
+ def ri64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, f64imm:$b),
+ !strconcat(opcstr, ".f64\t$d, $a, $b"),
+ [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
+}
+
+//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
+multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+ def rrr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b, RRegf32:$c),
+ !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+ [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
+ RRegf32:$b),
+ RRegf32:$c))]>;
+ def rri32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b, f32imm:$c),
+ !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
+ [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
+ RRegf32:$b),
+ fpimm:$c))]>;
+ def rrr64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b, RRegf64:$c),
+ !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+ [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
+ RRegf64:$b),
+ RRegf64:$c))]>;
+ def rri64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b, f64imm:$c),
+ !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
+ [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
+ RRegf64:$b),
+ fpimm:$c))]>;
+}
+
multiclass INT3<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, RRegs32:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
- def ri : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, i32imm:$b),
- !strconcat(opcstr, ".%type\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
+ def rr16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, RRegu16:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+ def ri16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, i16imm:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, RRegu32:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+ def ri32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, i32imm:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, RRegu64:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+ def ri64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, i64imm:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+}
+
+multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
+ def ripreds : InstPTX<(outs Preds:$d),
+ (ins Preds:$a, i1imm:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set Preds:$d, (opnode Preds:$a, imm:$b))]>;
+ def rrpreds : InstPTX<(outs Preds:$d),
+ (ins Preds:$a, Preds:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>;
+ def rr16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, RRegu16:$b),
+ !strconcat(opcstr, ".b16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+ def ri16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, i16imm:$b),
+ !strconcat(opcstr, ".b16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, RRegu32:$b),
+ !strconcat(opcstr, ".b32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+ def ri32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, i32imm:$b),
+ !strconcat(opcstr, ".b32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, RRegu64:$b),
+ !strconcat(opcstr, ".b64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+ def ri64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, i64imm:$b),
+ !strconcat(opcstr, ".b64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
}
-// no %type directive, non-communtable
multiclass INT3ntnc<string opcstr, SDNode opnode> {
- def rr : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, RRegs32:$b),
- !strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
- def ri : InstPTX<(outs RRegs32:$d),
- (ins RRegs32:$a, i32imm:$b),
- !strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
- def ir : InstPTX<(outs RRegs32:$d),
- (ins i32imm:$a, RRegs32:$b),
- !strconcat(opcstr, "\t$d, $a, $b"),
- [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>;
+ def rr16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, RRegu16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+ def rr32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, RRegu32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+ def rr64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, RRegu64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+ def ri16 : InstPTX<(outs RRegu16:$d),
+ (ins RRegu16:$a, i16imm:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+ def ri32 : InstPTX<(outs RRegu32:$d),
+ (ins RRegu32:$a, i32imm:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+ def ri64 : InstPTX<(outs RRegu64:$d),
+ (ins RRegu64:$a, i64imm:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+ def ir16 : InstPTX<(outs RRegu16:$d),
+ (ins i16imm:$a, RRegu16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>;
+ def ir32 : InstPTX<(outs RRegu32:$d),
+ (ins i32imm:$a, RRegu32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
+ def ir64 : InstPTX<(outs RRegu64:$d),
+ (ins i64imm:$a, RRegu64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>;
}
-multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
- def rr : InstPTX<(outs RC:$d),
- (ins MEMri:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
- [(set RC:$d, (pat_load ADDRrr:$a))]>;
- def ri : InstPTX<(outs RC:$d),
- (ins MEMri:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
- [(set RC:$d, (pat_load ADDRri:$a))]>;
- def ii : InstPTX<(outs RC:$d),
- (ins MEMii:$a),
- !strconcat(opstr, ".%type\t$d, [$a]"),
- [(set RC:$d, (pat_load ADDRii:$a))]>;
+multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
+ CondCode cmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>;
+ def ri
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>;
+
+ def rr_and_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+ def ri_and_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+ def rr_or_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+ def ri_or_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+ def rr_xor_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+ def ri_xor_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+
+ def rr_and_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+ def ri_and_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+ def rr_or_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+ def ri_or_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+ def rr_xor_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+ def ri_xor_not_r
+ : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
}
-multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
- def rr : InstPTX<(outs),
- (ins RC:$d, MEMri:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
- [(pat_store RC:$d, ADDRrr:$a)]>;
- def ri : InstPTX<(outs),
- (ins RC:$d, MEMri:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
- [(pat_store RC:$d, ADDRri:$a)]>;
- def ii : InstPTX<(outs),
- (ins RC:$d, MEMii:$a),
- !strconcat(opstr, ".%type\t[$a], $d"),
- [(pat_store RC:$d, ADDRii:$a)]>;
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+ CondCode ucmp, CondCode ocmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+ def rr_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+
+ def rr_and_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_and_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_or_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_or_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_xor_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+ def rr_xor_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+
+ def rr_and_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_and_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+
+ def rr_or_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_or_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+
+ def rr_xor_not_r_u
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+ def rr_xor_not_r_o
+ : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
+ [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+}
+
+multiclass PTX_SELP<RegisterClass RC, string regclsname> {
+ def rr
+ : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>;
+}
+
+multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
+ def rr32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs RC:$d),
+ (ins MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs RC:$d),
+ (ins MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
+ def rr32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs),
+ (ins RC:$d, MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs),
+ (ins RC:$d, MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
}
//===----------------------------------------------------------------------===//
@@ -208,50 +568,392 @@ multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
defm ADD : INT3<"add", add>;
defm SUB : INT3<"sub", sub>;
+defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : INT3<"div", udiv>;
+defm REM : INT3<"rem", urem>;
+
+///===- Floating-Point Arithmetic Instructions ----------------------------===//
+
+// Standard Unary Operations
+defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
+
+// Standard Binary Operations
+defm FADD : PTX_FLOAT_3OP<"add", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
+
+// TODO: Allow user selection of rounding modes for fdiv.
+// For division, we need to have f32 and f64 differently.
+// For f32, we just always use .approx since it is supported on all hardware
+// for PTX 1.4+, which is our minimum target.
+def FDIVrr32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, RRegf32:$b),
+ "div.approx.f32\t$d, $a, $b",
+ [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>;
+def FDIVri32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a, f32imm:$b),
+ "div.approx.f32\t$d, $a, $b",
+ [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>;
+
+// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
+def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b),
+ "div.rn.f64\t$d, $a, $b",
+ [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
+ Requires<[SupportsSM13]>;
+def FDIVri64SM13 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, f64imm:$b),
+ "div.rn.f64\t$d, $a, $b",
+ [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
+ Requires<[SupportsSM13]>;
+def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, RRegf64:$b),
+ "div.f64\t$d, $a, $b",
+ [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
+ Requires<[DoesNotSupportSM13]>;
+def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a, f64imm:$b),
+ "div.f64\t$d, $a, $b",
+ [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
+ Requires<[DoesNotSupportSM13]>;
+
+
+
+// Multi-operation hybrid instructions
+
+// The selection of mad/fma is tricky. In some cases, they are the *same*
+// instruction, but in other cases we may prefer one or the other. Also,
+// different PTX versions differ on whether rounding mode flags are required.
+// In the short term, mad is supported on all PTX versions and we use a
+// default rounding mode no matter what shader model or PTX version.
+// TODO: Allow the rounding mode to be selectable through llc.
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13]>;
+
+///===- Floating-Point Intrinsic Instructions -----------------------------===//
+
+def FSQRT32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a),
+ "sqrt.rn.f32\t$d, $a",
+ [(set RRegf32:$d, (fsqrt RRegf32:$a))]>;
+
+def FSQRT64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a),
+ "sqrt.rn.f64\t$d, $a",
+ [(set RRegf64:$d, (fsqrt RRegf64:$a))]>;
+
+def FSIN32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a),
+ "sin.approx.f32\t$d, $a",
+ [(set RRegf32:$d, (fsin RRegf32:$a))]>;
+
+def FSIN64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a),
+ "sin.approx.f64\t$d, $a",
+ [(set RRegf64:$d, (fsin RRegf64:$a))]>;
+
+def FCOS32 : InstPTX<(outs RRegf32:$d),
+ (ins RRegf32:$a),
+ "cos.approx.f32\t$d, $a",
+ [(set RRegf32:$d, (fcos RRegf32:$a))]>;
+
+def FCOS64 : InstPTX<(outs RRegf64:$d),
+ (ins RRegf64:$a),
+ "cos.approx.f64\t$d, $a",
+ [(set RRegf64:$d, (fcos RRegf64:$a))]>;
+
+
+///===- Comparison and Selection Instructions -----------------------------===//
+
+// Compare u16
+
+defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">;
+defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">;
+defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">;
+
+// Compare u32
+
+defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">;
+defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">;
+defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">;
+
+// Compare u64
+
+defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">;
+defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">;
+defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">;
+
+// Compare f32
+
+defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">;
+
+// Compare f64
+
+defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">;
+
+// .selp
+
+defm PTX_SELPu16 : PTX_SELP<RRegu16, "u16">;
+defm PTX_SELPu32 : PTX_SELP<RRegu32, "u32">;
+defm PTX_SELPu64 : PTX_SELP<RRegu64, "u64">;
+defm PTX_SELPf32 : PTX_SELP<RRegf32, "f32">;
+defm PTX_SELPf64 : PTX_SELP<RRegf64, "f64">;
///===- Logic and Shift Instructions --------------------------------------===//
-defm SHL : INT3ntnc<"shl.b32", PTXshl>;
-defm SRL : INT3ntnc<"shr.u32", PTXsrl>;
-defm SRA : INT3ntnc<"shr.s32", PTXsra>;
+defm SHL : INT3ntnc<"shl.b", PTXshl>;
+defm SRL : INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : INT3ntnc<"shr.s", PTXsra>;
+
+defm AND : PTX_LOGIC<"and", and>;
+defm OR : PTX_LOGIC<"or", or>;
+defm XOR : PTX_LOGIC<"xor", xor>;
///===- Data Movement and Conversion Instructions -------------------------===//
let neverHasSideEffects = 1 in {
- // rely on isMoveInstr to separate MOVpp, MOVrr, etc.
- def MOVpp
+ def MOVPREDrr
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
- def MOVrr
- : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
+ def MOVU16rr
+ : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
+ def MOVU32rr
+ : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
+ def MOVU64rr
+ : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
+ def MOVF32rr
+ : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
+ def MOVF64rr
+ : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def MOVpi
+ def MOVPREDri
: InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
[(set Preds:$d, imm:$a)]>;
- def MOVri
- : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
- [(set RRegs32:$d, imm:$a)]>;
+ def MOVU16ri
+ : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+ [(set RRegu16:$d, imm:$a)]>;
+ def MOVU32ri
+ : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RRegu32:$d, imm:$a)]>;
+ def MOVU64ri
+ : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RRegu64:$d, imm:$a)]>;
+ def MOVF32ri
+ : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+ [(set RRegf32:$d, fpimm:$a)]>;
+ def MOVF64ri
+ : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+ [(set RRegf64:$d, fpimm:$a)]>;
}
-defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
-defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>;
-defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
-defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>;
-defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVaddr32
+ : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+ def MOVaddr64
+ : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+}
+
+// Loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDl : PTX_LD_ALL<"ld.local", load_local>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// This is a special instruction that is manually inserted for kernel parameters
+def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
+ "ld.param.u16\t$d, [$a]", []>;
+def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
+ "ld.param.u32\t$d, [$a]", []>;
+def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
+ "ld.param.u64\t$d, [$a]", []>;
+def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
+ "ld.param.f32\t$d, [$a]", []>;
+def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
+ "ld.param.f64\t$d, [$a]", []>;
+
+// Stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STl : PTX_ST_ALL<"st.local", store_local>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
+// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
+// TODO: Do something with st.param if/when it is needed.
+
+// Conversion to pred
+
+def CVT_pred_u16
+ : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a",
+ [(set Preds:$d, (trunc RRegu16:$a))]>;
-def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
- "ld.param.%type\t$d, [$a]", []>;
+def CVT_pred_u32
+ : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
+ [(set Preds:$d, (trunc RRegu32:$a))]>;
-defm STg : PTX_ST<"st.global", RRegs32, store_global>;
-defm STl : PTX_ST<"st.local", RRegs32, store_local>;
-// Store to parameter state space requires PTX 2.0 or higher?
-// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
-defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
+def CVT_pred_u64
+ : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a",
+ [(set Preds:$d, (trunc RRegu64:$a))]>;
+
+def CVT_pred_f32
+ : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a",
+ [(set Preds:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_pred_f64
+ : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a",
+ [(set Preds:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u16
+
+def CVT_u16_pred
+ : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a",
+ [(set RRegu16:$d, (zext Preds:$a))]>;
+
+def CVT_u16_u32
+ : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a",
+ [(set RRegu16:$d, (trunc RRegu32:$a))]>;
+
+def CVT_u16_u64
+ : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a",
+ [(set RRegu16:$d, (trunc RRegu64:$a))]>;
+
+def CVT_u16_f32
+ : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a",
+ [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u16_f64
+ : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a",
+ [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u32
+
+def CVT_u32_pred
+ : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
+ [(set RRegu32:$d, (zext Preds:$a))]>;
+
+def CVT_u32_u16
+ : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a",
+ [(set RRegu32:$d, (zext RRegu16:$a))]>;
+
+def CVT_u32_u64
+ : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a",
+ [(set RRegu32:$d, (trunc RRegu64:$a))]>;
+
+def CVT_u32_f32
+ : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a",
+ [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u32_f64
+ : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a",
+ [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to u64
+
+def CVT_u64_pred
+ : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a",
+ [(set RRegu64:$d, (zext Preds:$a))]>;
+
+def CVT_u64_u16
+ : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a",
+ [(set RRegu64:$d, (zext RRegu16:$a))]>;
+
+def CVT_u64_u32
+ : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a",
+ [(set RRegu64:$d, (zext RRegu32:$a))]>;
+
+def CVT_u64_f32
+ : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a",
+ [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>;
+
+def CVT_u64_f64
+ : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a",
+ [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>;
+
+// Conversion to f32
+
+def CVT_f32_pred
+ : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp Preds:$a))]>;
+
+def CVT_f32_u16
+ : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>;
+
+def CVT_f32_u32
+ : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>;
+
+def CVT_f32_u64
+ : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a",
+ [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>;
+
+def CVT_f32_f64
+ : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a",
+ [(set RRegf32:$d, (fround RRegf64:$a))]>;
+
+// Conversion to f64
+
+def CVT_f64_pred
+ : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp Preds:$a))]>;
+
+def CVT_f64_u16
+ : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>;
+
+def CVT_f64_u32
+ : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>;
+
+def CVT_f64_u64
+ : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a",
+ [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>;
+
+def CVT_f64_f32
+ : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a",
+ [(set RRegf64:$d, (fextend RRegf32:$a))]>;
///===- Control Flow Instructions -----------------------------------------===//
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def BRAd
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: The pattern part is blank because I cannot (or do not yet know
+ // how to) use the first operand of PredicateOperand (a Preds register) here
+ def BRAdp
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+ [/*(brcond pred:$_p, bb:$d)*/]>;
+}
+
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
}
+
+///===- Intrinsic Instructions --------------------------------------------===//
+
+include "PTXIntrinsicInstrInfo.td"
diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
new file mode 100644
index 0000000..320934a
--- /dev/null
+++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -0,0 +1,84 @@
+//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsic instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+ : InstPTX<(outs RRegu64:$d), (ins),
+ !strconcat("mov.u64\t$d, %", regname),
+ [(set RRegu64:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+ : InstPTX<(outs RRegu32:$d), (ins),
+ !strconcat("mov.u32\t$d, %", regname),
+ [(set RRegu32:$d, (intop))]>;
+
+// TODO Add read vector-version of special registers
+
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>;
+
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>;
+
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>;
+
+//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>;
+
+//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>;
+
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>;
+
+def PTX_READ_LANEMASK_EQ
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
+def PTX_READ_LANEMASK_LE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
+def PTX_READ_LANEMASK_LT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
+def PTX_READ_LANEMASK_GE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
+def PTX_READ_LANEMASK_GT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
+
+def PTX_READ_CLOCK
+ : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
+def PTX_READ_CLOCK64
+ : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
+
+def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
+def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
+def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
+def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
+ [(int_ptx_bar_sync imm:$i)]>;
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
index 0886ba8..1574670 100644
--- a/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -143,9 +143,9 @@ public:
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
- bool isPCRel, unsigned AddrSpace);
- virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
- virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+ unsigned AddrSpace);
+ virtual void EmitULEB128Value(const MCExpr *Value);
+ virtual void EmitSLEB128Value(const MCExpr *Value);
virtual void EmitGPRel32Value(const MCExpr *Value);
@@ -233,7 +233,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(getCurrentSection() && "Cannot emit before setting section!");
+ //assert(getCurrentSection() && "Cannot emit before setting section!");
OS << *Symbol << MAI.getLabelSuffix();
EmitEOL();
@@ -352,9 +352,8 @@ void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
}
void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
- bool isPCRel, unsigned AddrSpace) {
+ unsigned AddrSpace) {
assert(getCurrentSection() && "Cannot emit contents before setting section!");
- assert(!isPCRel && "Cannot emit pc relative relocations!");
const char *Directive = 0;
switch (Size) {
default: break;
@@ -383,15 +382,13 @@ void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
EmitEOL();
}
-void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value,
- unsigned AddrSpace) {
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
assert(MAI.hasLEB128() && "Cannot print a .uleb");
OS << ".uleb128 " << *Value;
EmitEOL();
}
-void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value,
- unsigned AddrSpace) {
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
assert(MAI.hasLEB128() && "Cannot print a .sleb");
OS << ".sleb128 " << *Value;
EmitEOL();
@@ -423,7 +420,8 @@ void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
}
-void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value,
unsigned ValueSize,
unsigned MaxBytesToEmit) {
// Some assemblers don't support non-power of two alignments, so we always
@@ -532,7 +530,7 @@ void PTXMCAsmStreamer::Finish() {}
namespace llvm {
MCStreamer *createPTXAsmStreamer(MCContext &Context,
formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useLoc,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
MCInstPrinter *IP,
MCCodeEmitter *CE, TargetAsmBackend *TAB,
bool ShowInst) {
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index b37c740..c5e1910 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -79,12 +79,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
i = MFI->argRegBegin(), e = MFI->argRegEnd();
- i != e; ++i)
+ i != e; ++i)
dbgs() << "Arg Reg: " << *i << "\n";);
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
- i != e; ++i)
+ i != e; ++i)
dbgs() << "Local Var Reg: " << *i << "\n";);
return false;
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 56d044b..81df1c2 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -42,36 +42,37 @@ public:
void setRetReg(unsigned reg) { reg_ret = reg; }
void doneAddArg(void) {
- std::sort(reg_arg.begin(), reg_arg.end());
_isDoneAddArg = true;
}
- void doneAddLocalVar(void) {
- std::sort(reg_local_var.begin(), reg_local_var.end());
- }
+ void doneAddLocalVar(void) {}
bool isDoneAddArg(void) { return _isDoneAddArg; }
bool isKernel() const { return is_kernel; }
- typedef std::vector<unsigned>::const_iterator reg_iterator;
+ typedef std::vector<unsigned>::const_iterator reg_iterator;
+ typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
- bool argRegEmpty() const { return reg_arg.empty(); }
- int getNumArg() const { return reg_arg.size(); }
+ bool argRegEmpty() const { return reg_arg.empty(); }
+ int getNumArg() const { return reg_arg.size(); }
reg_iterator argRegBegin() const { return reg_arg.begin(); }
reg_iterator argRegEnd() const { return reg_arg.end(); }
+ reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
+ reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
- bool localVarRegEmpty() const { return reg_local_var.empty(); }
+ bool localVarRegEmpty() const { return reg_local_var.empty(); }
reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
reg_iterator localVarRegEnd() const { return reg_local_var.end(); }
unsigned retReg() const { return reg_ret; }
bool isArgReg(unsigned reg) const {
- return std::binary_search(reg_arg.begin(), reg_arg.end(), reg);
+ return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
}
bool isLocalVarReg(unsigned reg) const {
- return std::binary_search(reg_local_var.begin(), reg_local_var.end(), reg);
+ return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
+ != reg_local_var.end();
}
}; // class PTXMachineFunctionInfo
} // namespace llvm
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
index 22e2b34..f616141 100644
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -19,6 +19,8 @@ class PTXReg<string n> : Register<n> {
// Registers
//===----------------------------------------------------------------------===//
+///===- Predicate Registers -----------------------------------------------===//
+
def P0 : PTXReg<"p0">;
def P1 : PTXReg<"p1">;
def P2 : PTXReg<"p2">;
@@ -51,6 +53,108 @@ def P28 : PTXReg<"p28">;
def P29 : PTXReg<"p29">;
def P30 : PTXReg<"p30">;
def P31 : PTXReg<"p31">;
+def P32 : PTXReg<"p32">;
+def P33 : PTXReg<"p33">;
+def P34 : PTXReg<"p34">;
+def P35 : PTXReg<"p35">;
+def P36 : PTXReg<"p36">;
+def P37 : PTXReg<"p37">;
+def P38 : PTXReg<"p38">;
+def P39 : PTXReg<"p39">;
+def P40 : PTXReg<"p40">;
+def P41 : PTXReg<"p41">;
+def P42 : PTXReg<"p42">;
+def P43 : PTXReg<"p43">;
+def P44 : PTXReg<"p44">;
+def P45 : PTXReg<"p45">;
+def P46 : PTXReg<"p46">;
+def P47 : PTXReg<"p47">;
+def P48 : PTXReg<"p48">;
+def P49 : PTXReg<"p49">;
+def P50 : PTXReg<"p50">;
+def P51 : PTXReg<"p51">;
+def P52 : PTXReg<"p52">;
+def P53 : PTXReg<"p53">;
+def P54 : PTXReg<"p54">;
+def P55 : PTXReg<"p55">;
+def P56 : PTXReg<"p56">;
+def P57 : PTXReg<"p57">;
+def P58 : PTXReg<"p58">;
+def P59 : PTXReg<"p59">;
+def P60 : PTXReg<"p60">;
+def P61 : PTXReg<"p61">;
+def P62 : PTXReg<"p62">;
+def P63 : PTXReg<"p63">;
+
+///===- 16-bit Integer Registers ------------------------------------------===//
+
+def RH0 : PTXReg<"rh0">;
+def RH1 : PTXReg<"rh1">;
+def RH2 : PTXReg<"rh2">;
+def RH3 : PTXReg<"rh3">;
+def RH4 : PTXReg<"rh4">;
+def RH5 : PTXReg<"rh5">;
+def RH6 : PTXReg<"rh6">;
+def RH7 : PTXReg<"rh7">;
+def RH8 : PTXReg<"rh8">;
+def RH9 : PTXReg<"rh9">;
+def RH10 : PTXReg<"rh10">;
+def RH11 : PTXReg<"rh11">;
+def RH12 : PTXReg<"rh12">;
+def RH13 : PTXReg<"rh13">;
+def RH14 : PTXReg<"rh14">;
+def RH15 : PTXReg<"rh15">;
+def RH16 : PTXReg<"rh16">;
+def RH17 : PTXReg<"rh17">;
+def RH18 : PTXReg<"rh18">;
+def RH19 : PTXReg<"rh19">;
+def RH20 : PTXReg<"rh20">;
+def RH21 : PTXReg<"rh21">;
+def RH22 : PTXReg<"rh22">;
+def RH23 : PTXReg<"rh23">;
+def RH24 : PTXReg<"rh24">;
+def RH25 : PTXReg<"rh25">;
+def RH26 : PTXReg<"rh26">;
+def RH27 : PTXReg<"rh27">;
+def RH28 : PTXReg<"rh28">;
+def RH29 : PTXReg<"rh29">;
+def RH30 : PTXReg<"rh30">;
+def RH31 : PTXReg<"rh31">;
+def RH32 : PTXReg<"rh32">;
+def RH33 : PTXReg<"rh33">;
+def RH34 : PTXReg<"rh34">;
+def RH35 : PTXReg<"rh35">;
+def RH36 : PTXReg<"rh36">;
+def RH37 : PTXReg<"rh37">;
+def RH38 : PTXReg<"rh38">;
+def RH39 : PTXReg<"rh39">;
+def RH40 : PTXReg<"rh40">;
+def RH41 : PTXReg<"rh41">;
+def RH42 : PTXReg<"rh42">;
+def RH43 : PTXReg<"rh43">;
+def RH44 : PTXReg<"rh44">;
+def RH45 : PTXReg<"rh45">;
+def RH46 : PTXReg<"rh46">;
+def RH47 : PTXReg<"rh47">;
+def RH48 : PTXReg<"rh48">;
+def RH49 : PTXReg<"rh49">;
+def RH50 : PTXReg<"rh50">;
+def RH51 : PTXReg<"rh51">;
+def RH52 : PTXReg<"rh52">;
+def RH53 : PTXReg<"rh53">;
+def RH54 : PTXReg<"rh54">;
+def RH55 : PTXReg<"rh55">;
+def RH56 : PTXReg<"rh56">;
+def RH57 : PTXReg<"rh57">;
+def RH58 : PTXReg<"rh58">;
+def RH59 : PTXReg<"rh59">;
+def RH60 : PTXReg<"rh60">;
+def RH61 : PTXReg<"rh61">;
+def RH62 : PTXReg<"rh62">;
+def RH63 : PTXReg<"rh63">;
+
+
+///===- 32-bit Integer Registers ------------------------------------------===//
def R0 : PTXReg<"r0">;
def R1 : PTXReg<"r1">;
@@ -84,6 +188,243 @@ def R28 : PTXReg<"r28">;
def R29 : PTXReg<"r29">;
def R30 : PTXReg<"r30">;
def R31 : PTXReg<"r31">;
+def R32 : PTXReg<"r32">;
+def R33 : PTXReg<"r33">;
+def R34 : PTXReg<"r34">;
+def R35 : PTXReg<"r35">;
+def R36 : PTXReg<"r36">;
+def R37 : PTXReg<"r37">;
+def R38 : PTXReg<"r38">;
+def R39 : PTXReg<"r39">;
+def R40 : PTXReg<"r40">;
+def R41 : PTXReg<"r41">;
+def R42 : PTXReg<"r42">;
+def R43 : PTXReg<"r43">;
+def R44 : PTXReg<"r44">;
+def R45 : PTXReg<"r45">;
+def R46 : PTXReg<"r46">;
+def R47 : PTXReg<"r47">;
+def R48 : PTXReg<"r48">;
+def R49 : PTXReg<"r49">;
+def R50 : PTXReg<"r50">;
+def R51 : PTXReg<"r51">;
+def R52 : PTXReg<"r52">;
+def R53 : PTXReg<"r53">;
+def R54 : PTXReg<"r54">;
+def R55 : PTXReg<"r55">;
+def R56 : PTXReg<"r56">;
+def R57 : PTXReg<"r57">;
+def R58 : PTXReg<"r58">;
+def R59 : PTXReg<"r59">;
+def R60 : PTXReg<"r60">;
+def R61 : PTXReg<"r61">;
+def R62 : PTXReg<"r62">;
+def R63 : PTXReg<"r63">;
+
+
+///===- 64-bit Integer Registers ------------------------------------------===//
+
+def RD0 : PTXReg<"rd0">;
+def RD1 : PTXReg<"rd1">;
+def RD2 : PTXReg<"rd2">;
+def RD3 : PTXReg<"rd3">;
+def RD4 : PTXReg<"rd4">;
+def RD5 : PTXReg<"rd5">;
+def RD6 : PTXReg<"rd6">;
+def RD7 : PTXReg<"rd7">;
+def RD8 : PTXReg<"rd8">;
+def RD9 : PTXReg<"rd9">;
+def RD10 : PTXReg<"rd10">;
+def RD11 : PTXReg<"rd11">;
+def RD12 : PTXReg<"rd12">;
+def RD13 : PTXReg<"rd13">;
+def RD14 : PTXReg<"rd14">;
+def RD15 : PTXReg<"rd15">;
+def RD16 : PTXReg<"rd16">;
+def RD17 : PTXReg<"rd17">;
+def RD18 : PTXReg<"rd18">;
+def RD19 : PTXReg<"rd19">;
+def RD20 : PTXReg<"rd20">;
+def RD21 : PTXReg<"rd21">;
+def RD22 : PTXReg<"rd22">;
+def RD23 : PTXReg<"rd23">;
+def RD24 : PTXReg<"rd24">;
+def RD25 : PTXReg<"rd25">;
+def RD26 : PTXReg<"rd26">;
+def RD27 : PTXReg<"rd27">;
+def RD28 : PTXReg<"rd28">;
+def RD29 : PTXReg<"rd29">;
+def RD30 : PTXReg<"rd30">;
+def RD31 : PTXReg<"rd31">;
+def RD32 : PTXReg<"rd32">;
+def RD33 : PTXReg<"rd33">;
+def RD34 : PTXReg<"rd34">;
+def RD35 : PTXReg<"rd35">;
+def RD36 : PTXReg<"rd36">;
+def RD37 : PTXReg<"rd37">;
+def RD38 : PTXReg<"rd38">;
+def RD39 : PTXReg<"rd39">;
+def RD40 : PTXReg<"rd40">;
+def RD41 : PTXReg<"rd41">;
+def RD42 : PTXReg<"rd42">;
+def RD43 : PTXReg<"rd43">;
+def RD44 : PTXReg<"rd44">;
+def RD45 : PTXReg<"rd45">;
+def RD46 : PTXReg<"rd46">;
+def RD47 : PTXReg<"rd47">;
+def RD48 : PTXReg<"rd48">;
+def RD49 : PTXReg<"rd49">;
+def RD50 : PTXReg<"rd50">;
+def RD51 : PTXReg<"rd51">;
+def RD52 : PTXReg<"rd52">;
+def RD53 : PTXReg<"rd53">;
+def RD54 : PTXReg<"rd54">;
+def RD55 : PTXReg<"rd55">;
+def RD56 : PTXReg<"rd56">;
+def RD57 : PTXReg<"rd57">;
+def RD58 : PTXReg<"rd58">;
+def RD59 : PTXReg<"rd59">;
+def RD60 : PTXReg<"rd60">;
+def RD61 : PTXReg<"rd61">;
+def RD62 : PTXReg<"rd62">;
+def RD63 : PTXReg<"rd63">;
+
+
+///===- 32-bit Floating-Point Registers -----------------------------------===//
+
+def F0 : PTXReg<"f0">;
+def F1 : PTXReg<"f1">;
+def F2 : PTXReg<"f2">;
+def F3 : PTXReg<"f3">;
+def F4 : PTXReg<"f4">;
+def F5 : PTXReg<"f5">;
+def F6 : PTXReg<"f6">;
+def F7 : PTXReg<"f7">;
+def F8 : PTXReg<"f8">;
+def F9 : PTXReg<"f9">;
+def F10 : PTXReg<"f10">;
+def F11 : PTXReg<"f11">;
+def F12 : PTXReg<"f12">;
+def F13 : PTXReg<"f13">;
+def F14 : PTXReg<"f14">;
+def F15 : PTXReg<"f15">;
+def F16 : PTXReg<"f16">;
+def F17 : PTXReg<"f17">;
+def F18 : PTXReg<"f18">;
+def F19 : PTXReg<"f19">;
+def F20 : PTXReg<"f20">;
+def F21 : PTXReg<"f21">;
+def F22 : PTXReg<"f22">;
+def F23 : PTXReg<"f23">;
+def F24 : PTXReg<"f24">;
+def F25 : PTXReg<"f25">;
+def F26 : PTXReg<"f26">;
+def F27 : PTXReg<"f27">;
+def F28 : PTXReg<"f28">;
+def F29 : PTXReg<"f29">;
+def F30 : PTXReg<"f30">;
+def F31 : PTXReg<"f31">;
+def F32 : PTXReg<"f32">;
+def F33 : PTXReg<"f33">;
+def F34 : PTXReg<"f34">;
+def F35 : PTXReg<"f35">;
+def F36 : PTXReg<"f36">;
+def F37 : PTXReg<"f37">;
+def F38 : PTXReg<"f38">;
+def F39 : PTXReg<"f39">;
+def F40 : PTXReg<"f40">;
+def F41 : PTXReg<"f41">;
+def F42 : PTXReg<"f42">;
+def F43 : PTXReg<"f43">;
+def F44 : PTXReg<"f44">;
+def F45 : PTXReg<"f45">;
+def F46 : PTXReg<"f46">;
+def F47 : PTXReg<"f47">;
+def F48 : PTXReg<"f48">;
+def F49 : PTXReg<"f49">;
+def F50 : PTXReg<"f50">;
+def F51 : PTXReg<"f51">;
+def F52 : PTXReg<"f52">;
+def F53 : PTXReg<"f53">;
+def F54 : PTXReg<"f54">;
+def F55 : PTXReg<"f55">;
+def F56 : PTXReg<"f56">;
+def F57 : PTXReg<"f57">;
+def F58 : PTXReg<"f58">;
+def F59 : PTXReg<"f59">;
+def F60 : PTXReg<"f60">;
+def F61 : PTXReg<"f61">;
+def F62 : PTXReg<"f62">;
+def F63 : PTXReg<"f63">;
+
+
+///===- 64-bit Floating-Point Registers -----------------------------------===//
+
+def FD0 : PTXReg<"fd0">;
+def FD1 : PTXReg<"fd1">;
+def FD2 : PTXReg<"fd2">;
+def FD3 : PTXReg<"fd3">;
+def FD4 : PTXReg<"fd4">;
+def FD5 : PTXReg<"fd5">;
+def FD6 : PTXReg<"fd6">;
+def FD7 : PTXReg<"fd7">;
+def FD8 : PTXReg<"fd8">;
+def FD9 : PTXReg<"fd9">;
+def FD10 : PTXReg<"fd10">;
+def FD11 : PTXReg<"fd11">;
+def FD12 : PTXReg<"fd12">;
+def FD13 : PTXReg<"fd13">;
+def FD14 : PTXReg<"fd14">;
+def FD15 : PTXReg<"fd15">;
+def FD16 : PTXReg<"fd16">;
+def FD17 : PTXReg<"fd17">;
+def FD18 : PTXReg<"fd18">;
+def FD19 : PTXReg<"fd19">;
+def FD20 : PTXReg<"fd20">;
+def FD21 : PTXReg<"fd21">;
+def FD22 : PTXReg<"fd22">;
+def FD23 : PTXReg<"fd23">;
+def FD24 : PTXReg<"fd24">;
+def FD25 : PTXReg<"fd25">;
+def FD26 : PTXReg<"fd26">;
+def FD27 : PTXReg<"fd27">;
+def FD28 : PTXReg<"fd28">;
+def FD29 : PTXReg<"fd29">;
+def FD30 : PTXReg<"fd30">;
+def FD31 : PTXReg<"fd31">;
+def FD32 : PTXReg<"fd32">;
+def FD33 : PTXReg<"fd33">;
+def FD34 : PTXReg<"fd34">;
+def FD35 : PTXReg<"fd35">;
+def FD36 : PTXReg<"fd36">;
+def FD37 : PTXReg<"fd37">;
+def FD38 : PTXReg<"fd38">;
+def FD39 : PTXReg<"fd39">;
+def FD40 : PTXReg<"fd40">;
+def FD41 : PTXReg<"fd41">;
+def FD42 : PTXReg<"fd42">;
+def FD43 : PTXReg<"fd43">;
+def FD44 : PTXReg<"fd44">;
+def FD45 : PTXReg<"fd45">;
+def FD46 : PTXReg<"f4d6">;
+def FD47 : PTXReg<"fd47">;
+def FD48 : PTXReg<"fd48">;
+def FD49 : PTXReg<"fd49">;
+def FD50 : PTXReg<"fd50">;
+def FD51 : PTXReg<"fd51">;
+def FD52 : PTXReg<"fd52">;
+def FD53 : PTXReg<"fd53">;
+def FD54 : PTXReg<"fd54">;
+def FD55 : PTXReg<"fd55">;
+def FD56 : PTXReg<"fd56">;
+def FD57 : PTXReg<"fd57">;
+def FD58 : PTXReg<"fd58">;
+def FD59 : PTXReg<"fd59">;
+def FD60 : PTXReg<"fd60">;
+def FD61 : PTXReg<"fd61">;
+def FD62 : PTXReg<"fd62">;
+def FD63 : PTXReg<"fd63">;
+
//===----------------------------------------------------------------------===//
// Register classes
@@ -93,10 +434,58 @@ def Preds : RegisterClass<"PTX", [i1], 8,
[P0, P1, P2, P3, P4, P5, P6, P7,
P8, P9, P10, P11, P12, P13, P14, P15,
P16, P17, P18, P19, P20, P21, P22, P23,
- P24, P25, P26, P27, P28, P29, P30, P31]>;
+ P24, P25, P26, P27, P28, P29, P30, P31,
+ P32, P33, P34, P35, P36, P37, P38, P39,
+ P40, P41, P42, P43, P44, P45, P46, P47,
+ P48, P49, P50, P51, P52, P53, P54, P55,
+ P56, P57, P58, P59, P60, P61, P62, P63]>;
-def RRegs32 : RegisterClass<"PTX", [i32], 32,
+def RRegu16 : RegisterClass<"PTX", [i16], 16,
+ [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
+ RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
+ RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
+ RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31,
+ RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39,
+ RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47,
+ RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55,
+ RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63]>;
+
+def RRegu32 : RegisterClass<"PTX", [i32], 32,
[R0, R1, R2, R3, R4, R5, R6, R7,
R8, R9, R10, R11, R12, R13, R14, R15,
R16, R17, R18, R19, R20, R21, R22, R23,
- R24, R25, R26, R27, R28, R29, R30, R31]>;
+ R24, R25, R26, R27, R28, R29, R30, R31,
+ R32, R33, R34, R35, R36, R37, R38, R39,
+ R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55,
+ R56, R57, R58, R59, R60, R61, R62, R63]>;
+
+def RRegu64 : RegisterClass<"PTX", [i64], 64,
+ [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
+ RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
+ RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
+ RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31,
+ RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39,
+ RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47,
+ RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55,
+ RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63]>;
+
+def RRegf32 : RegisterClass<"PTX", [f32], 32,
+ [F0, F1, F2, F3, F4, F5, F6, F7,
+ F8, F9, F10, F11, F12, F13, F14, F15,
+ F16, F17, F18, F19, F20, F21, F22, F23,
+ F24, F25, F26, F27, F28, F29, F30, F31,
+ F32, F33, F34, F35, F36, F37, F38, F39,
+ F40, F41, F42, F43, F44, F45, F46, F47,
+ F48, F49, F50, F51, F52, F53, F54, F55,
+ F56, F57, F58, F59, F60, F61, F62, F63]>;
+
+def RRegf64 : RegisterClass<"PTX", [f64], 64,
+ [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
+ FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
+ FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
+ FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31,
+ FD32, FD33, FD34, FD35, FD36, FD37, FD38, FD39,
+ FD40, FD41, FD42, FD43, FD44, FD45, FD46, FD47,
+ FD48, FD49, FD50, FD51, FD52, FD53, FD54, FD55,
+ FD56, FD57, FD58, FD59, FD60, FD61, FD62, FD63]>;
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
index 00e2c88..a224f2b 100644
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -12,12 +12,36 @@
//===----------------------------------------------------------------------===//
#include "PTXSubtarget.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
-PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) {
- std::string TARGET = "sm_20";
- // TODO: call ParseSubtargetFeatures(FS, TARGET);
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
+ bool is64Bit)
+ : PTXShaderModel(PTX_SM_1_0),
+ PTXVersion(PTX_VERSION_2_0),
+ SupportsDouble(false),
+ Is64Bit(is64Bit) {
+ std::string TARGET = "generic";
+ ParseSubtargetFeatures(FS, TARGET);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+ switch(PTXShaderModel) {
+ default: llvm_unreachable("Unknown shader model");
+ case PTX_SM_1_0: return "sm_10";
+ case PTX_SM_1_3: return "sm_13";
+ case PTX_SM_2_0: return "sm_20";
+ }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+ switch(PTXVersion) {
+ default: llvm_unreachable("Unknown PTX version");
+ case PTX_VERSION_2_0: return "2.0";
+ case PTX_VERSION_2_1: return "2.1";
+ case PTX_VERSION_2_2: return "2.2";
+ }
}
#include "PTXGenSubtarget.inc"
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index 7fd85f8..47d9842 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -19,10 +19,57 @@
namespace llvm {
class PTXSubtarget : public TargetSubtarget {
private:
- bool is_sm20;
+
+ /**
+ * Enumeration of Shader Models supported by the back-end.
+ */
+ enum PTXShaderModelEnum {
+ PTX_SM_1_0, /*< Shader Model 1.0 */
+ PTX_SM_1_3, /*< Shader Model 1.3 */
+ PTX_SM_2_0 /*< Shader Model 2.0 */
+ };
+
+ /**
+ * Enumeration of PTX versions supported by the back-end.
+ *
+ * Currently, PTX 2.0 is the minimum supported version.
+ */
+ enum PTXVersionEnum {
+ PTX_VERSION_2_0, /*< PTX Version 2.0 */
+ PTX_VERSION_2_1, /*< PTX Version 2.1 */
+ PTX_VERSION_2_2 /*< PTX Version 2.2 */
+ };
+
+ /// Shader Model supported on the target GPU.
+ PTXShaderModelEnum PTXShaderModel;
+
+ /// PTX Language Version.
+ PTXVersionEnum PTXVersion;
+
+ // The native .f64 type is supported on the hardware.
+ bool SupportsDouble;
+
+ // Use .u64 instead of .u32 for addresses.
+ bool Is64Bit;
public:
- PTXSubtarget(const std::string &TT, const std::string &FS);
+ PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
+
+ std::string getTargetString() const;
+
+ std::string getPTXVersionString() const;
+
+ bool supportsDouble() const { return SupportsDouble; }
+
+ bool is64Bit() const { return Is64Bit; }
+
+ bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
+
+ bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
+
+ bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
+
+ bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
std::string ParseSubtargetFeatures(const std::string &FS,
const std::string &CPU);
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index b263813..1b737c9 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -16,12 +16,14 @@
#include "PTXTargetMachine.h"
#include "llvm/PassManager.h"
#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace llvm {
MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc,
+ bool useCFI,
MCInstPrinter *InstPrint,
MCCodeEmitter *CE,
TargetAsmBackend *TAB,
@@ -29,21 +31,47 @@ namespace llvm {
}
extern "C" void LLVMInitializePTXTarget() {
- RegisterTargetMachine<PTXTargetMachine> X(ThePTXTarget);
- RegisterAsmInfo<PTXMCAsmInfo> Y(ThePTXTarget);
- TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer);
+
+ RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
+ RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
+
+ RegisterAsmInfo<PTXMCAsmInfo> Z(ThePTX32Target);
+ RegisterAsmInfo<PTXMCAsmInfo> W(ThePTX64Target);
+
+ TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
+}
+
+namespace {
+ const char* DataLayout32 =
+ "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+ const char* DataLayout64 =
+ "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
}
// DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine::PTXTargetMachine(const Target &T,
const std::string &TT,
- const std::string &FS)
+ const std::string &FS,
+ bool is64Bit)
: LLVMTargetMachine(T, TT),
- DataLayout("e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"),
+ DataLayout(is64Bit ? DataLayout64 : DataLayout32),
+ Subtarget(TT, FS, is64Bit),
FrameLowering(Subtarget),
InstrInfo(*this),
- TLInfo(*this),
- Subtarget(TT, FS) {
+ TLInfo(*this) {
+}
+
+PTX32TargetMachine::PTX32TargetMachine(const Target &T,
+ const std::string& TT,
+ const std::string& FS)
+ : PTXTargetMachine(T, TT, FS, false) {
+}
+
+PTX64TargetMachine::PTX64TargetMachine(const Target &T,
+ const std::string& TT,
+ const std::string& FS)
+ : PTXTargetMachine(T, TT, FS, true) {
}
bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 728e36f..149be8e 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -25,15 +25,15 @@
namespace llvm {
class PTXTargetMachine : public LLVMTargetMachine {
private:
- const TargetData DataLayout;
- PTXFrameLowering FrameLowering;
- PTXInstrInfo InstrInfo;
+ const TargetData DataLayout;
+ PTXSubtarget Subtarget; // has to be initialized before FrameLowering
+ PTXFrameLowering FrameLowering;
+ PTXInstrInfo InstrInfo;
PTXTargetLowering TLInfo;
- PTXSubtarget Subtarget;
public:
PTXTargetMachine(const Target &T, const std::string &TT,
- const std::string &FS);
+ const std::string &FS, bool is64Bit);
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -55,6 +55,22 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual bool addPostRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
}; // class PTXTargetMachine
+
+
+class PTX32TargetMachine : public PTXTargetMachine {
+public:
+
+ PTX32TargetMachine(const Target &T, const std::string &TT,
+ const std::string& FS);
+}; // class PTX32TargetMachine
+
+class PTX64TargetMachine : public PTXTargetMachine {
+public:
+
+ PTX64TargetMachine(const Target &T, const std::string &TT,
+ const std::string& FS);
+}; // class PTX32TargetMachine
+
} // namespace llvm
#endif // PTX_TARGET_MACHINE_H
diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
index a577d77..9df6c75 100644
--- a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
+++ b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -13,9 +13,13 @@
using namespace llvm;
-Target llvm::ThePTXTarget;
+Target llvm::ThePTX32Target;
+Target llvm::ThePTX64Target;
extern "C" void LLVMInitializePTXTargetInfo() {
// see llvm/ADT/Triple.h
- RegisterTarget<Triple::ptx> X(ThePTXTarget, "ptx", "PTX");
+ RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
+ "PTX (32-bit) [Experimental]");
+ RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
+ "PTX (64-bit) [Experimental]");
}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index ebc10da..9cf9db9 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -17,13 +17,16 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
- class MCOperand;
+
+class MCOperand;
+class TargetMachine;
class PPCInstPrinter : public MCInstPrinter {
// 0 -> AIX, 1 -> Darwin.
unsigned SyntaxVariant;
public:
- PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
+ PPCInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI,
+ unsigned syntaxVariant)
: MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
bool isDarwinSyntax() const {
diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp
index c4d4ac9..f562a3f 100644
--- a/lib/Target/PowerPC/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -110,10 +110,8 @@ namespace {
TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ if (Triple(TT).isOSDarwin())
return new DarwinPPCAsmBackend(T);
- default:
- return 0;
- }
+
+ return 0;
}
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8ed5d7f..09a9be9 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -680,9 +680,10 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+ TargetMachine &TM,
unsigned SyntaxVariant,
const MCAsmInfo &MAI) {
- return new PPCInstPrinter(MAI, SyntaxVariant);
+ return new PPCInstPrinter(TM, MAI, SyntaxVariant);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 70d00e4..128522c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -899,7 +899,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ CN->getValueType(0));
return true;
}
@@ -947,7 +948,8 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPC::R0, N.getValueType());
+ Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0,
+ N.getValueType());
Index = N;
return true;
}
@@ -2153,7 +2155,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
}
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
-/// adjusted to accomodate the arguments for the tailcall.
+/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
unsigned ParamSize) {
@@ -2394,7 +2396,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
// might overwrite each other in case of tail call optimization.
SmallVector<SDValue, 8> MemOpChains2;
- // Do not flag preceeding copytoreg stuff together with the following stuff.
+ // Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
MemOpChains2, dl);
@@ -2442,7 +2444,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- PPCSubTarget.getDarwinVers() < 9 &&
+ (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
G->getGlobal()->isWeakForLinker())) {
// PC-relative references to external symbols should go through $stub,
@@ -2465,7 +2468,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned char OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- PPCSubTarget.getDarwinVers() < 9) {
+ (!PPCSubTarget.getTargetTriple().isMacOSX() ||
+ PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -4571,6 +4575,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
bool is64bit = PPCSubTarget.isPPC64();
+ unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
@@ -4634,8 +4639,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// bne- loopMBB
// fallthrough --> exitMBB
// srw dest, tmpDest, shift
-
- if (ptrA!=PPC::R0) {
+ if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
.addReg(ptrA).addReg(ptrB);
@@ -4665,7 +4669,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
BB = loopMBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
- .addReg(PPC::R0).addReg(PtrReg);
+ .addReg(ZeroReg).addReg(PtrReg);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
.addReg(Incr2Reg).addReg(TmpDestReg);
@@ -4676,7 +4680,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
.addReg(Tmp3Reg).addReg(Tmp2Reg);
BuildMI(BB, dl, TII->get(PPC::STWCX))
- .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
+ .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
@@ -4685,7 +4689,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+ .addReg(ShiftReg);
return BB;
}
@@ -4933,6 +4938,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
unsigned Ptr1Reg;
unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0;
// thisMBB:
// ...
// fallthrough --> loopMBB
@@ -4965,7 +4971,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// stwcx. tmpDest, ptr
// exitBB:
// srw dest, tmpDest, shift
- if (ptrA!=PPC::R0) {
+ if (ptrA != ZeroReg) {
Ptr1Reg = RegInfo.createVirtualRegister(RC);
BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
.addReg(ptrA).addReg(ptrB);
@@ -5002,7 +5008,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = loop1MBB;
BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
- .addReg(PPC::R0).addReg(PtrReg);
+ .addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
.addReg(TmpDestReg).addReg(MaskReg);
BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
@@ -5018,7 +5024,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
.addReg(Tmp2Reg).addReg(NewVal3Reg);
BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
- .addReg(PPC::R0).addReg(PtrReg);
+ .addReg(ZeroReg).addReg(PtrReg);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
@@ -5027,13 +5033,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = midMBB;
BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
- .addReg(PPC::R0).addReg(PtrReg);
+ .addReg(ZeroReg).addReg(PtrReg);
BB->addSuccessor(exitMBB);
// exitMBB:
// ...
BB = exitMBB;
- BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+ .addReg(ShiftReg);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 6636b69..9f0fae5 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -130,7 +130,7 @@ def : Pat<(PPCnop),
// Atomic operations
let usesCustomInserter = 1 in {
- let Uses = [CR0] in {
+ let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I64 : Pseudo<
(outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
[(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 82aadeb..24071b7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -550,7 +550,7 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
// Atomic operations
let usesCustomInserter = 1 in {
- let Uses = [CR0] in {
+ let Defs = [CR0] in {
def ATOMIC_LOAD_ADD_I8 : Pseudo<
(outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
[(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
index d1178dd..9e508cc 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp
@@ -17,7 +17,7 @@ using namespace llvm;
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
PCSymbol = ".";
CommentString = ";";
- ExceptionsType = ExceptionHandling::DwarfTable;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
if (!is64Bit)
Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 72a1dee..5f3aa23 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -70,7 +70,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
, HasSTFIWX(false)
, HasLazyResolverStubs(false)
, IsJITCodeModel(false)
- , DarwinVers(0) {
+ , TargetTriple(TT) {
// Determine default and user specified characteristics
std::string CPU = "generic";
@@ -92,19 +92,6 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
// support it, ignore.
if (use64BitRegs() && !has64BitSupport())
Use64BitRegs = false;
-
- // Set the boolean corresponding to the current target triple, or the default
- // if one cannot be determined, to true.
- if (TT.length() > 7) {
- // Determine which version of darwin this is.
- size_t DarwinPos = TT.find("-darwin");
- if (DarwinPos != std::string::npos) {
- if (isdigit(TT[DarwinPos+7]))
- DarwinVers = atoi(&TT[DarwinPos+7]);
- else
- DarwinVers = 8; // Minimum supported darwin is Tiger.
- }
- }
// Set up darwin-specific properties.
if (isDarwin())
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 00ec747..8fd1a44 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,6 +14,7 @@
#ifndef POWERPCSUBTARGET_H
#define POWERPCSUBTARGET_H
+#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetSubtarget.h"
@@ -65,9 +66,9 @@ protected:
bool HasLazyResolverStubs;
bool IsJITCodeModel;
- /// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric
- /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
- unsigned char DarwinVers; // Is any darwin-ppc platform.
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
@@ -134,13 +135,10 @@ public:
bool hasAltivec() const { return HasAltivec; }
bool isGigaProcessor() const { return IsGigaProcessor; }
- /// isDarwin - True if this is any darwin platform.
- bool isDarwin() const { return DarwinVers != 0; }
- /// isDarwin - True if this is darwin9 (leopard, 10.5) or above.
- bool isDarwin9() const { return DarwinVers >= 9; }
+ const Triple &getTargetTriple() const { return TargetTriple; }
- /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
- unsigned getDarwinVers() const { return DarwinVers; }
+ /// isDarwin - True if this is any darwin platform.
+ bool isDarwin() const { return TargetTriple.isMacOSX(); }
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 212b450..d27e54e 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
- if (TheTriple.getOS() == Triple::Darwin)
+ if (TheTriple.isOSDarwin())
return new PPCMCAsmInfoDarwin(isPPC64);
return new PPCLinuxMCAsmInfo(isPPC64);
@@ -37,12 +37,10 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
MCCodeEmitter *Emitter,
bool RelaxAll,
bool NoExecStack) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ if (Triple(TT).isOSDarwin())
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
- default:
- return NULL;
- }
+
+ return NULL;
}
extern "C" void LLVMInitializePowerPCTarget() {
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index f85914b..ffe3fa4 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -392,34 +392,6 @@ PHI Slicing could be extended to do this.
//===---------------------------------------------------------------------===//
-LSR should know what GPR types a target has from TargetData. This code:
-
-volatile short X, Y; // globals
-
-void foo(int N) {
- int i;
- for (i = 0; i < N; i++) { X = i; Y = i*4; }
-}
-
-produces two near identical IV's (after promotion) on PPC/ARM:
-
-LBB1_2:
- ldr r3, LCPI1_0
- ldr r3, [r3]
- strh r2, [r3]
- ldr r3, LCPI1_1
- ldr r3, [r3]
- strh r1, [r3]
- add r1, r1, #4
- add r2, r2, #1 <- [0,+,1]
- sub r0, r0, #1 <- [0,-,1]
- cmp r0, #0
- bne LBB1_2
-
-LSR should reuse the "+" IV for the exit test.
-
-//===---------------------------------------------------------------------===//
-
Tail call elim should be more aggressive, checking to see if the call is
followed by an uncond branch to an exit block.
@@ -1325,6 +1297,21 @@ codegen.
//===---------------------------------------------------------------------===//
+simplifylibcalls should turn these snprintf idioms into memcpy (GCC PR47917)
+
+char buf1[6], buf2[6], buf3[4], buf4[4];
+int i;
+
+int foo (void) {
+ int ret = snprintf (buf1, sizeof buf1, "abcde");
+ ret += snprintf (buf2, sizeof buf2, "abcdef") * 16;
+ ret += snprintf (buf3, sizeof buf3, "%s", i++ < 6 ? "abc" : "def") * 256;
+ ret += snprintf (buf4, sizeof buf4, "%s", i++ > 10 ? "abcde" : "defgh")*4096;
+ return ret;
+}
+
+//===---------------------------------------------------------------------===//
+
"gas" uses this idiom:
else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string))
..
@@ -1780,43 +1767,6 @@ case it choses instead to keep the max operation obvious.
//===---------------------------------------------------------------------===//
-Take the following testcase on x86-64 (similar testcases exist for all targets
-with addc/adde):
-
-define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b,
-i64 %c) nounwind {
-entry:
- %0 = zext i64 %a to i128 ; <i128> [#uses=1]
- %1 = zext i64 %b to i128 ; <i128> [#uses=1]
- %2 = add i128 %1, %0 ; <i128> [#uses=2]
- %3 = zext i64 %c to i128 ; <i128> [#uses=1]
- %4 = shl i128 %3, 64 ; <i128> [#uses=1]
- %5 = add i128 %4, %2 ; <i128> [#uses=1]
- %6 = lshr i128 %5, 64 ; <i128> [#uses=1]
- %7 = trunc i128 %6 to i64 ; <i64> [#uses=1]
- store i64 %7, i64* %s, align 8
- %8 = trunc i128 %2 to i64 ; <i64> [#uses=1]
- store i64 %8, i64* %t, align 8
- ret void
-}
-
-Generated code:
- addq %rcx, %rdx
- sbbq %rax, %rax
- subq %rax, %r8
- movq %r8, (%rdi)
- movq %rdx, (%rsi)
- ret
-
-Expected code:
- addq %rcx, %rdx
- adcq $0, %r8
- movq %r8, (%rdi)
- movq %rdx, (%rsi)
- ret
-
-//===---------------------------------------------------------------------===//
-
Switch lowering generates less than ideal code for the following switch:
define void @a(i32 %x) nounwind {
entry:
@@ -2124,11 +2074,12 @@ for.end: ; preds = %entry
}
This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold
-the two memset's together. The issue with %n seems to stem from poor handling
-of the original loop.
+the two memset's together.
-To simplify this, we need SCEV to know that "n != 0" because of the dominating
-conditional. That would turn the second memset into a simple memset of 'n'.
+The issue with the addition only occurs in 64-bit mode, and appears to be at
+least partially caused by Scalar Evolution not keeping its cache updated: it
+returns the "wrong" result immediately after indvars runs, but figures out the
+expected result if it is run from scratch on IR resulting from running indvars.
//===---------------------------------------------------------------------===//
@@ -2287,4 +2238,71 @@ missed cases:
//===---------------------------------------------------------------------===//
+define i1 @test1(i32 %x) nounwind {
+ %and = and i32 %x, 3
+ %cmp = icmp ult i32 %and, 2
+ ret i1 %cmp
+}
+
+Can be folded to (x & 2) == 0.
+
+define i1 @test2(i32 %x) nounwind {
+ %and = and i32 %x, 3
+ %cmp = icmp ugt i32 %and, 1
+ ret i1 %cmp
+}
+
+Can be folded to (x & 2) != 0.
+
+SimplifyDemandedBits shrinks the "and" constant to 2 but instcombine misses the
+icmp transform.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+typedef struct {
+int f1:1;
+int f2:1;
+int f3:1;
+int f4:29;
+} t1;
+
+typedef struct {
+int f1:1;
+int f2:1;
+int f3:30;
+} t2;
+
+t1 s1;
+t2 s2;
+
+void func1(void)
+{
+s1.f1 = s2.f1;
+s1.f2 = s2.f2;
+}
+
+Compiles into this IR (on x86-64 at least):
+
+%struct.t1 = type { i8, [3 x i8] }
+@s2 = global %struct.t1 zeroinitializer, align 4
+@s1 = global %struct.t1 zeroinitializer, align 4
+define void @func1() nounwind ssp noredzone {
+entry:
+ %0 = load i32* bitcast (%struct.t1* @s2 to i32*), align 4
+ %bf.val.sext5 = and i32 %0, 1
+ %1 = load i32* bitcast (%struct.t1* @s1 to i32*), align 4
+ %2 = and i32 %1, -4
+ %3 = or i32 %2, %bf.val.sext5
+ %bf.val.sext26 = and i32 %0, 2
+ %4 = or i32 %3, %bf.val.sext26
+ store i32 %4, i32* bitcast (%struct.t1* @s1 to i32*), align 4
+ ret void
+}
+
+The two or/and's should be merged into one each.
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 70574c3..edb62fa 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -544,7 +544,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emited instructions must be
+ // The InFlag in necessary since all emitted instructions must be
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index 3cf95b5..e0a9de8 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -211,7 +211,7 @@ const std::string & SubtargetFeatures::getCPU() const {
/// feature, set it.
///
static
-void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
const SubtargetFeatureKV *FeatureTable,
size_t FeatureTableSize) {
for (size_t i = 0; i < FeatureTableSize; ++i) {
@@ -230,7 +230,7 @@ void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
/// feature, clear it.
///
static
-void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
const SubtargetFeatureKV *FeatureTable,
size_t FeatureTableSize) {
for (size_t i = 0; i < FeatureTableSize; ++i) {
@@ -247,7 +247,7 @@ void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
/// getBits - Get feature bits.
///
-uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
+uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
size_t CPUTableSize,
const SubtargetFeatureKV *FeatureTable,
size_t FeatureTableSize) {
@@ -263,7 +263,7 @@ uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
"CPU features table is not sorted");
}
#endif
- uint32_t Bits = 0; // Resulting bits
+ uint64_t Bits = 0; // Resulting bits
// Check if help is needed
if (Features[0] == "help")
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 90939c3..d331614 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -451,7 +451,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token chain and
// flag operands which copy the outgoing args into registers. The InFlag in
- // necessary since all emited instructions must be stuck together.
+ // necessary since all emitted instructions must be stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index c628df0..1990bc7 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -617,10 +617,14 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
const Type *ElemType = GV->getType()->getElementType();
unsigned Alignment = getPrefTypeAlignment(ElemType);
- if (GV->getAlignment() > Alignment)
- Alignment = GV->getAlignment();
+ unsigned GVAlignment = GV->getAlignment();
+ if (GVAlignment >= Alignment) {
+ Alignment = GVAlignment;
+ } else if (GVAlignment != 0) {
+ Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType));
+ }
- if (GV->hasInitializer()) {
+ if (GV->hasInitializer() && GVAlignment == 0) {
if (Alignment < 16) {
// If the global is not external, see if it is large. If so, give it a
// larger alignment.
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 97f3bf6..d4b7697 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -149,10 +149,10 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
/// Measure the specified inline asm to determine an approximation of its
/// length.
-/// Comments (which run till the next SeparatorChar or newline) do not
+/// Comments (which run till the next SeparatorString or newline) do not
/// count as an instruction.
/// Any other non-whitespace text is considered an instruction, with
-/// multiple instructions separated by SeparatorChar or newlines.
+/// multiple instructions separated by SeparatorString or newlines.
/// Variable-length instructions are not handled here; this function
/// may be overloaded in the target code to do that.
unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
@@ -163,7 +163,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
bool atInsnStart = true;
unsigned Length = 0;
for (; *Str; ++Str) {
- if (*Str == '\n' || *Str == MAI.getSeparatorChar())
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
atInsnStart = true;
if (atInsnStart && !std::isspace(*Str)) {
Length += MAI.getMaxInstLength();
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index c8bed18..e336b09 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -28,9 +28,22 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
// memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
- if (T.getOS() != Triple::Darwin || T.getDarwinMajorNumber() < 9)
+ if (T.isMacOSX()) {
+ if (T.isMacOSXVersionLT(10, 5))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else if (T.getOS() == Triple::IOS) {
+ if (T.isOSVersionLT(3, 0))
+ TLI.setUnavailable(LibFunc::memset_pattern16);
+ } else {
TLI.setUnavailable(LibFunc::memset_pattern16);
-
+ }
+
+ // iprintf and friends are only available on XCore.
+ if (T.getArch() != Triple::xcore) {
+ TLI.setUnavailable(LibFunc::iprintf);
+ TLI.setUnavailable(LibFunc::siprintf);
+ TLI.setUnavailable(LibFunc::fiprintf);
+ }
}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 5d34c7d..717ad41 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -120,6 +120,18 @@ static bool IsNullTerminatedString(const Constant *C) {
return false;
}
+MCSymbol *TargetLoweringObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
+void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
+ const TargetMachine &TM,
+ const MCSymbol *Sym) const {
+}
+
+
/// getKindForGlobal - This is a top-level target-independent classifier for
/// a global variable. Given an global variable and information from TM, it
/// classifies the global in a variety of ways that make various target
@@ -305,16 +317,15 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const {
const MCSymbol *Sym = Mang->getSymbol(GV);
- return getExprForDwarfReference(Sym, Mang, MMI, Encoding, Streamer);
+ return getExprForDwarfReference(Sym, Encoding, Streamer);
}
const MCExpr *TargetLoweringObjectFile::
-getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding,
+getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
MCStreamer &Streamer) const {
const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
- switch (Encoding & 0xF0) {
+ switch (Encoding & 0x70) {
default:
report_fatal_error("We do not support this DWARF encoding yet!");
case dwarf::DW_EH_PE_absptr:
@@ -339,7 +350,7 @@ unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
return dwarf::DW_EH_PE_absptr;
}
-unsigned TargetLoweringObjectFile::getFDEEncoding() const {
+unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const {
return dwarf::DW_EH_PE_absptr;
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index d579d95..76ccc09 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -48,6 +48,7 @@ namespace llvm {
bool RealignStack;
bool DisableJumpTables;
bool StrongPHIElim;
+ bool HasDivModLibcall;
bool AsmVerbosityDefault(false);
}
@@ -205,6 +206,10 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
cl::desc("Use strong PHI elimination."),
cl::location(StrongPHIElim),
cl::init(false));
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+ cl::desc("Emit a call to trap function rather than a trap instruction"),
+ cl::init(""));
static cl::opt<bool>
DataSections("fdata-sections",
cl::desc("Emit data into separate sections"),
@@ -221,7 +226,9 @@ TargetMachine::TargetMachine(const Target &T)
: TheTarget(T), AsmInfo(0),
MCRelaxAll(false),
MCNoExecStack(false),
- MCUseLoc(true) {
+ MCSaveTempLabels(false),
+ MCUseLoc(true),
+ MCUseCFI(true) {
// Typically it will be subtargets that will adjust FloatABIType from Default
// to Soft or Hard.
if (UseSoftFloat)
@@ -303,4 +310,11 @@ namespace llvm {
bool HonorSignDependentRoundingFPMath() {
return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
}
+
+ /// getTrapFunctionName - If this returns a non-empty string, this means isel
+ /// should lower Intrinsic::trap to a call to the specified function name
+ /// instead of an ISD::TRAP node.
+ StringRef getTrapFunctionName() {
+ return TrapFuncName;
+ }
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8fe549b..c352bfc 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -53,6 +53,14 @@ private:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
+ /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
+ /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+ bool isSrcOp(X86Operand &Op);
+
+ /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
+ /// or %es:(%edi) in 32bit mode.
+ bool isDstOp(X86Operand &Op);
+
/// @name Auto-generated Matcher Functions
/// {
@@ -356,6 +364,24 @@ struct X86Operand : public MCParsedAsmOperand {
} // end anonymous namespace.
+bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+ unsigned basereg = Is64Bit ? X86::RSI : X86::ESI;
+
+ return (Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
+}
+
+bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+ unsigned basereg = Is64Bit ? X86::RDI : X86::EDI;
+
+ return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
+}
bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
@@ -788,7 +814,106 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
delete &Op;
}
}
-
+ // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
+ if (Name.startswith("ins") && Operands.size() == 3 &&
+ (Name == "insb" || Name == "insw" || Name == "insl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
+ if (Name.startswith("outs") && Operands.size() == 3 &&
+ (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
+ if (Name.startswith("movs") && Operands.size() == 3 &&
+ (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
+ (Is64Bit && Name == "movsq"))) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+ // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
+ if (Name.startswith("lods") && Operands.size() == 3 &&
+ (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
+ Name == "lodsl" || (Is64Bit && Name == "lodsq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isSrcOp(*Op1) && Op2->isReg()) {
+ const char *ins;
+ unsigned reg = Op2->getReg();
+ bool isLods = Name == "lods";
+ if (reg == X86::AL && (isLods || Name == "lodsb"))
+ ins = "lodsb";
+ else if (reg == X86::AX && (isLods || Name == "lodsw"))
+ ins = "lodsw";
+ else if (reg == X86::EAX && (isLods || Name == "lodsl"))
+ ins = "lodsl";
+ else if (reg == X86::RAX && (isLods || Name == "lodsq"))
+ ins = "lodsq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+ // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
+ if (Name.startswith("stos") && Operands.size() == 3 &&
+ (Name == "stos" || Name == "stosb" || Name == "stosw" ||
+ Name == "stosl" || (Is64Bit && Name == "stosq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isDstOp(*Op2) && Op1->isReg()) {
+ const char *ins;
+ unsigned reg = Op1->getReg();
+ bool isStos = Name == "stos";
+ if (reg == X86::AL && (isStos || Name == "stosb"))
+ ins = "stosb";
+ else if (reg == X86::AX && (isStos || Name == "stosw"))
+ ins = "stosw";
+ else if (reg == X86::EAX && (isStos || Name == "stosl"))
+ ins = "stosl";
+ else if (reg == X86::RAX && (isStos || Name == "stosq"))
+ ins = "stosq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+
// FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
// "shift <op>".
if ((Name.startswith("shr") || Name.startswith("sar") ||
@@ -803,6 +928,18 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
}
+
+ // Transforms "int $3" into "int3" as a size optimization. We can't write an
+ // instalias with an immediate operand yet.
+ if (Name == "int" && Operands.size() == 2) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+ }
+ }
return false;
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index f777756..d8a105e 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -409,6 +409,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_XMM32:
case TYPE_XMM64:
case TYPE_XMM128:
+ case TYPE_XMM256:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
return translateRMRegister(mcInst, insn);
@@ -418,6 +419,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_M32:
case TYPE_M64:
case TYPE_M128:
+ case TYPE_M256:
case TYPE_M512:
case TYPE_Mv:
case TYPE_M32FP:
@@ -500,6 +502,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
case ENCODING_Rv:
translateRegister(mcInst, insn.opcodeRegister);
return false;
+ case ENCODING_VVVV:
+ translateRegister(mcInst, insn.vvvv);
+ return false;
case ENCODING_DUP:
return translateOperand(mcInst,
insn.spec->operands[operand.type - TYPE_DUP0],
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index b6546fc..de1610b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -75,6 +75,12 @@ static int modRMRequired(OpcodeType type,
case THREEBYTE_3A:
decision = &THREEBYTE3A_SYM;
break;
+ case THREEBYTE_A6:
+ decision = &THREEBYTEA6_SYM;
+ break;
+ case THREEBYTE_A7:
+ decision = &THREEBYTEA7_SYM;
+ break;
}
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
@@ -115,6 +121,12 @@ static InstrUID decode(OpcodeType type,
case THREEBYTE_3A:
dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case THREEBYTE_A6:
+ dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A7:
+ dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
@@ -368,29 +380,109 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (isPrefix)
dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
+
+ insn->vexSize = 0;
- if (insn->mode == MODE_64BIT) {
- if ((byte & 0xf0) == 0x40) {
- uint8_t opcodeByte;
+ if (byte == 0xc4) {
+ uint8_t byte1;
- if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
- dbgprintf(insn, "Redundant REX prefix");
- return -1;
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ insn->vexSize = 3;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vexSize == 3) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+ consumeByte(insn, &insn->vexPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+
+ switch (ppFromVEX3of3(insn->vexPrefix[2]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
}
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+ }
+ }
+ else if (byte == 0xc5) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
- insn->rexPrefix = byte;
- insn->necessaryPrefixLocation = insn->readerCursor - 2;
-
- dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
- } else {
+ if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ insn->vexSize = 2;
+ }
+ else {
+ unconsumeByte(insn);
+ }
+
+ if (insn->vexSize == 2) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+
+ switch (ppFromVEX2of2(insn->vexPrefix[1]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+ }
+ }
+ else {
+ if (insn->mode == MODE_64BIT) {
+ if ((byte & 0xf0) == 0x40) {
+ uint8_t opcodeByte;
+
+ if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ dbgprintf(insn, "Redundant REX prefix");
+ return -1;
+ }
+
+ insn->rexPrefix = byte;
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+
+ dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
- } else {
- unconsumeByte(insn);
- insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
-
+
if (insn->mode == MODE_16BIT) {
insn->registerSize = (hasOpSize ? 4 : 2);
insn->addressSize = (hasAdSize ? 4 : 2);
@@ -438,6 +530,39 @@ static int readOpcode(struct InternalInstruction* insn) {
dbgprintf(insn, "readOpcode()");
insn->opcodeType = ONEBYTE;
+
+ if (insn->vexSize == 3)
+ {
+ switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+ {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+ return -1;
+ case 0:
+ break;
+ case VEX_LOB_0F:
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F38:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x38;
+ insn->opcodeType = THREEBYTE_38;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x3a;
+ insn->opcodeType = THREEBYTE_3A;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
+ else if (insn->vexSize == 2)
+ {
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ }
+
if (consumeByte(insn, &current))
return -1;
@@ -467,6 +592,24 @@ static int readOpcode(struct InternalInstruction* insn) {
return -1;
insn->opcodeType = THREEBYTE_3A;
+ } else if (current == 0xa6) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A6;
+ } else if (current == 0xa7) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A7;
} else {
dbgprintf(insn, "Didn't find a three-byte escape prefix");
@@ -600,20 +743,64 @@ static int getID(struct InternalInstruction* insn) {
dbgprintf(insn, "getID()");
attrMask = ATTR_NONE;
-
+
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
-
- if (insn->rexPrefix & 0x08)
- attrMask |= ATTR_REXW;
-
- if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
- attrMask |= ATTR_OPSIZE;
- else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
- attrMask |= ATTR_XS;
- else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
- attrMask |= ATTR_XD;
-
+
+ if (insn->vexSize) {
+ attrMask |= ATTR_VEX;
+
+ if (insn->vexSize == 3) {
+ switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (wFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_REXW;
+ if (lFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ }
+ else if (insn->vexSize == 2) {
+ switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX2of2(insn->vexPrefix[1]))
+ attrMask |= ATTR_VEXL;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
+ if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XS;
+ else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XD;
+
+ }
+
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
@@ -749,7 +936,7 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibIndex = SIB_INDEX_NONE;
break;
default:
- insn->sibIndex = (EABase)(sibIndexBase + index);
+ insn->sibIndex = (SIBIndex)(sibIndexBase + index);
if (insn->sibIndex == SIB_INDEX_sib ||
insn->sibIndex == SIB_INDEX_sib64)
insn->sibIndex = SIB_INDEX_NONE;
@@ -796,7 +983,7 @@ static int readSIB(struct InternalInstruction* insn) {
}
break;
default:
- insn->sibBase = (EABase)(sibBaseBase + base);
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
}
@@ -1012,6 +1199,8 @@ static int readModRM(struct InternalInstruction* insn) {
return prefix##_EAX + index; \
case TYPE_R64: \
return prefix##_RAX + index; \
+ case TYPE_XMM256: \
+ return prefix##_YMM0 + index; \
case TYPE_XMM128: \
case TYPE_XMM64: \
case TYPE_XMM32: \
@@ -1073,6 +1262,14 @@ static int fixupReg(struct InternalInstruction *insn,
default:
debug("Expected a REG or R/M encoding in fixupReg");
return -1;
+ case ENCODING_VVVV:
+ insn->vvvv = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->vvvv,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
case ENCODING_REG:
insn->reg = (Reg)fixupRegValue(insn,
(OperandType)op->type,
@@ -1237,6 +1434,27 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
}
/*
+ * readVVVV - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readVVVV()");
+
+ if (insn->vexSize == 3)
+ insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
+ else if (insn->vexSize == 2)
+ insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+ else
+ return -1;
+
+ return 0;
+}
+
+/*
* readOperands - Consults the specifier for an instruction and consumes all
* operands for that instruction, interpreting them as it goes.
*
@@ -1317,6 +1535,13 @@ static int readOperands(struct InternalInstruction* insn) {
case ENCODING_I:
if (readOpcodeModifier(insn))
return -1;
+ break;
+ case ENCODING_VVVV:
+ if (readVVVV(insn))
+ return -1;
+ if (fixupReg(insn, &insn->spec->operands[index]))
+ return -1;
+ break;
case ENCODING_DUP:
break;
default:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index d0dc8b5..a9c90f8 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -34,16 +34,30 @@ extern "C" {
/*
* Accessor functions for various fields of an Intel instruction
*/
-#define modFromModRM(modRM) ((modRM & 0xc0) >> 6)
-#define regFromModRM(modRM) ((modRM & 0x38) >> 3)
-#define rmFromModRM(modRM) (modRM & 0x7)
-#define scaleFromSIB(sib) ((sib & 0xc0) >> 6)
-#define indexFromSIB(sib) ((sib & 0x38) >> 3)
-#define baseFromSIB(sib) (sib & 0x7)
-#define wFromREX(rex) ((rex & 0x8) >> 3)
-#define rFromREX(rex) ((rex & 0x4) >> 2)
-#define xFromREX(rex) ((rex & 0x2) >> 1)
-#define bFromREX(rex) (rex & 0x1)
+#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
+#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
+#define rmFromModRM(modRM) ((modRM) & 0x7)
+#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
+#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
+#define baseFromSIB(sib) ((sib) & 0x7)
+#define wFromREX(rex) (((rex) & 0x8) >> 3)
+#define rFromREX(rex) (((rex) & 0x4) >> 2)
+#define xFromREX(rex) (((rex) & 0x2) >> 1)
+#define bFromREX(rex) ((rex) & 0x1)
+
+#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
+#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
+#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
+#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
+#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
+#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX3of3(vex) ((vex) & 0x3)
+
+#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
+#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX2of2(vex) ((vex) & 0x3)
/*
* These enums represent Intel registers for use by the decoder.
@@ -206,7 +220,25 @@ extern "C" {
ENTRY(XMM13) \
ENTRY(XMM14) \
ENTRY(XMM15)
-
+
+#define REGS_YMM \
+ ENTRY(YMM0) \
+ ENTRY(YMM1) \
+ ENTRY(YMM2) \
+ ENTRY(YMM3) \
+ ENTRY(YMM4) \
+ ENTRY(YMM5) \
+ ENTRY(YMM6) \
+ ENTRY(YMM7) \
+ ENTRY(YMM8) \
+ ENTRY(YMM9) \
+ ENTRY(YMM10) \
+ ENTRY(YMM11) \
+ ENTRY(YMM12) \
+ ENTRY(YMM13) \
+ ENTRY(YMM14) \
+ ENTRY(YMM15)
+
#define REGS_SEGMENT \
ENTRY(ES) \
ENTRY(CS) \
@@ -252,6 +284,7 @@ extern "C" {
REGS_64BIT \
REGS_MMX \
REGS_XMM \
+ REGS_YMM \
REGS_SEGMENT \
REGS_DEBUG \
REGS_CONTROL \
@@ -332,6 +365,27 @@ typedef enum {
SEG_OVERRIDE_GS,
SEG_OVERRIDE_max
} SegmentOverride;
+
+/*
+ * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
+ */
+
+typedef enum {
+ VEX_LOB_0F = 0x1,
+ VEX_LOB_0F38 = 0x2,
+ VEX_LOB_0F3A = 0x3
+} VEXLeadingOpcodeByte;
+
+/*
+ * VEXPrefixCode - Possible values for the VEX.pp field
+ */
+
+typedef enum {
+ VEX_PREFIX_NONE = 0x0,
+ VEX_PREFIX_66 = 0x1,
+ VEX_PREFIX_F3 = 0x2,
+ VEX_PREFIX_F2 = 0x3
+} VEXPrefixCode;
typedef uint8_t BOOL;
@@ -389,10 +443,12 @@ struct InternalInstruction {
uint8_t prefixPresent[0x100];
/* contains the location (for use with the reader) of the prefix byte */
uint64_t prefixLocations[0x100];
+ /* The value of the VEX prefix, if present */
+ uint8_t vexPrefix[3];
+ /* The length of the VEX prefix (0 if not present) */
+ uint8_t vexSize;
/* The value of the REX prefix, if present */
uint8_t rexPrefix;
- /* The location of the REX prefix */
- uint64_t rexLocation;
/* The location where a mandatory prefix would have to be (i.e., right before
the opcode, or right before the REX prefix if one is present) */
uint64_t necessaryPrefixLocation;
@@ -428,6 +484,10 @@ struct InternalInstruction {
/* state for additional bytes, consumed during operand decode. Pattern:
consumed___ indicates that the byte was already consumed and does not
need to be consumed again */
+
+ /* The VEX.vvvv field, which contains a third register operand for some AVX
+ instructions */
+ Reg vvvv;
/* The ModR/M byte, which contains most register operands and some portion of
all memory operands */
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 1425b86..70315ed 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -30,6 +30,8 @@
#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
+#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
+#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR "x86DisassemblerContexts"
@@ -37,6 +39,8 @@
#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
+#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
+#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
/*
* Attributes of an instruction that must be known before the opcode can be
@@ -49,7 +53,9 @@
ENUM_ENTRY(ATTR_XS, 0x02) \
ENUM_ENTRY(ATTR_XD, 0x04) \
ENUM_ENTRY(ATTR_REXW, 0x08) \
- ENUM_ENTRY(ATTR_OPSIZE, 0x10)
+ ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
+ ENUM_ENTRY(ATTR_VEX, 0x20) \
+ ENUM_ENTRY(ATTR_VEXL, 0x40)
#define ENUM_ENTRY(n, v) n = v,
enum attributeBits {
@@ -87,7 +93,20 @@ enum attributeBits {
"IC_64BIT_REXW_XS") \
ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
"else because this changes most " \
- "operands' meaning")
+ "operands' meaning") \
+ ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
+ ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
+ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
+ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
+ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
+ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
+ ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
+ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
+
#define ENUM_ENTRY(n, r, d) n,
typedef enum {
@@ -104,7 +123,9 @@ typedef enum {
ONEBYTE = 0,
TWOBYTE = 1,
THREEBYTE_38 = 2,
- THREEBYTE_3A = 3
+ THREEBYTE_3A = 3,
+ THREEBYTE_A6 = 4,
+ THREEBYTE_A7 = 5
} OpcodeType;
/*
@@ -183,6 +204,7 @@ struct ContextDecision {
ENUM_ENTRY(ENCODING_NONE, "") \
ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
ENUM_ENTRY(ENCODING_CW, "2-byte") \
ENUM_ENTRY(ENCODING_CD, "4-byte") \
@@ -278,6 +300,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
ENUM_ENTRY(TYPE_XMM64, "8-byte") \
ENUM_ENTRY(TYPE_XMM128, "16-byte") \
+ ENUM_ENTRY(TYPE_XMM256, "32-byte") \
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index d6950f4..dd6e353 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
+#include "X86Subtarget.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -22,24 +23,38 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "X86GenInstrNames.inc"
+#include <map>
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "X86GenRegisterNames.inc"
#include "X86GenAsmWriter.inc"
+#undef PRINT_ALIAS_INSTR
+#undef GET_INSTRUCTION_NAME
+
+X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(
+ &TM.getSubtarget<X86Subtarget>()));
+}
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
- printInstruction(MI, OS);
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, OS))
+ printInstruction(MI, OS);
// If verbose assembly is enabled, we can print some informative comments.
if (CommentStream)
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
+
StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
}
-
void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
switch (MI->getOperand(Op).getImm()) {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index eb98664..8d69391 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -17,16 +17,24 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
- class MCOperand;
+
+class MCOperand;
+class X86Subtarget;
+class TargetMachine;
class X86ATTInstPrinter : public MCInstPrinter {
public:
- X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
-
+ X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI);
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
+ // Methods used to print the alias of an instruction.
+ unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
+ // Autogenerated by tblgen, returns true if we successfully printed an
+ // alias.
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &OS);
static const char *getRegisterName(unsigned RegNo);
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 12144e3..c642acc 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(16, ShuffleMask);
+ DecodePUNPCKLBWMask(16, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(8, ShuffleMask);
+ DecodePUNPCKLWDMask(8, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(4, ShuffleMask);
+ DecodePUNPCKLDQMask(4, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(2, ShuffleMask);
+ DecodePUNPCKLQDQMask(2, ShuffleMask);
break;
case X86::SHUFPDrri:
@@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPDrm:
- DecodeUNPCKLPMask(2, ShuffleMask);
+ DecodeUNPCKLPDMask(2, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDrm:
+ DecodeUNPCKLPDMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDYrm:
+ DecodeUNPCKLPDMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
case X86::UNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPSrm:
- DecodeUNPCKLPMask(4, ShuffleMask);
+ DecodeUNPCKLPSMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSrm:
+ DecodeUNPCKLPSMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSYrm:
+ DecodeUNPCKLPSMask(8, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
case X86::UNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 0484529..47253eb 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
#include "X86InstComments.h"
+#include "X86Subtarget.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6f12032..ca99dc0 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -18,13 +18,15 @@
#include "llvm/Support/raw_ostream.h"
namespace llvm {
- class MCOperand;
+
+class MCOperand;
+class TargetMachine;
class X86IntelInstPrinter : public MCInstPrinter {
public:
- X86IntelInstPrinter(const MCAsmInfo &MAI)
+ X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
: MCInstPrinter(MAI) {}
-
+
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
@@ -33,7 +35,6 @@ public:
static const char *getRegisterName(unsigned RegNo);
static const char *getInstructionName(unsigned Opcode);
-
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index e21d69a..e7429a3 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -36,7 +36,7 @@ _conv:
cmovb %rcx, %rax
ret
-Seems like the jb branch has high likelyhood of being taken. It would have
+Seems like the jb branch has high likelihood of being taken. It would have
saved a few instructions.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index abd1515..ea3014e 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -7,14 +7,6 @@ copy (3-addr bswap + memory support?) This is available on Atom processors.
//===---------------------------------------------------------------------===//
-CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86
-backend knows how to three-addressify this shift, but it appears the register
-allocator isn't even asking it to do so in this case. We should investigate
-why this isn't happening, it could have significant impact on other important
-cases for X86 as well.
-
-//===---------------------------------------------------------------------===//
-
This should be one DIV/IDIV instruction, not a libcall:
unsigned test(unsigned long long X, unsigned Y) {
@@ -1572,7 +1564,7 @@ Implement processor-specific optimizations for parity with GCC on these
processors. GCC does two optimizations:
1. ix86_pad_returns inserts a noop before ret instructions if immediately
- preceeded by a conditional branch or is the target of a jump.
+ preceded by a conditional branch or is the target of a jump.
2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of
code contains more than 3 branches.
@@ -1656,28 +1648,61 @@ information to add the "lock" prefix.
//===---------------------------------------------------------------------===//
-_Bool bar(int *x) { return *x & 1; }
+struct B {
+ unsigned char y0 : 1;
+};
-define zeroext i1 @bar(i32* nocapture %x) nounwind readonly {
-entry:
- %tmp1 = load i32* %x ; <i32> [#uses=1]
- %and = and i32 %tmp1, 1 ; <i32> [#uses=1]
- %tobool = icmp ne i32 %and, 0 ; <i1> [#uses=1]
- ret i1 %tobool
+int bar(struct B* a) { return a->y0; }
+
+define i32 @bar(%struct.B* nocapture %a) nounwind readonly optsize {
+ %1 = getelementptr inbounds %struct.B* %a, i64 0, i32 0
+ %2 = load i8* %1, align 1
+ %3 = and i8 %2, 1
+ %4 = zext i8 %3 to i32
+ ret i32 %4
}
-bar: # @bar
-# BB#0: # %entry
- movl 4(%esp), %eax
- movb (%eax), %al
- andb $1, %al
- movzbl %al, %eax
- ret
+bar: # @bar
+# BB#0:
+ movb (%rdi), %al
+ andb $1, %al
+ movzbl %al, %eax
+ ret
Missed optimization: should be movl+andl.
//===---------------------------------------------------------------------===//
+The x86_64 abi says:
+
+Booleans, when stored in a memory object, are stored as single byte objects the
+value of which is always 0 (false) or 1 (true).
+
+We are not using this fact:
+
+int bar(_Bool *a) { return *a; }
+
+define i32 @bar(i8* nocapture %a) nounwind readonly optsize {
+ %1 = load i8* %a, align 1, !tbaa !0
+ %tmp = and i8 %1, 1
+ %2 = zext i8 %tmp to i32
+ ret i32 %2
+}
+
+bar:
+ movb (%rdi), %al
+ andb $1, %al
+ movzbl %al, %eax
+ ret
+
+GCC produces
+
+bar:
+ movzbl (%rdi), %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
Consider the following two functions compiled with clang:
_Bool foo(int *x) { return !(*x & 4); }
unsigned bar(int *x) { return !(*x & 4); }
@@ -1703,26 +1728,6 @@ are functionally identical.
//===---------------------------------------------------------------------===//
Take the following C code:
-int x(int y) { return (y & 63) << 14; }
-
-Code produced by gcc:
- andl $63, %edi
- sall $14, %edi
- movl %edi, %eax
- ret
-
-Code produced by clang:
- shll $14, %edi
- movl %edi, %eax
- andl $1032192, %eax
- ret
-
-The code produced by gcc is 3 bytes shorter. This sort of construct often
-shows up with bitfields.
-
-//===---------------------------------------------------------------------===//
-
-Take the following C code:
int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
We generate the following IR with clang:
@@ -1947,3 +1952,91 @@ which is "perfect".
//===---------------------------------------------------------------------===//
+For the branch in the following code:
+int a();
+int b(int x, int y) {
+ if (x & (1<<(y&7)))
+ return a();
+ return y;
+}
+
+We currently generate:
+ movb %sil, %al
+ andb $7, %al
+ movzbl %al, %eax
+ btl %eax, %edi
+ jae .LBB0_2
+
+movl+andl would be shorter than the movb+andb+movzbl sequence.
+
+//===---------------------------------------------------------------------===//
+
+For the following:
+struct u1 {
+ float x, y;
+};
+float foo(struct u1 u) {
+ return u.x + u.y;
+}
+
+We currently generate:
+ movdqa %xmm0, %xmm1
+ pshufd $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0,0,0]
+ addss %xmm1, %xmm0
+ ret
+
+We could save an instruction here by commuting the addss.
+
+//===---------------------------------------------------------------------===//
+
+This (from PR9661):
+
+float clamp_float(float a) {
+ if (a > 1.0f)
+ return 1.0f;
+ else if (a < 0.0f)
+ return 0.0f;
+ else
+ return a;
+}
+
+Could compile to:
+
+clamp_float: # @clamp_float
+ movss .LCPI0_0(%rip), %xmm1
+ minss %xmm1, %xmm0
+ pxor %xmm1, %xmm1
+ maxss %xmm1, %xmm0
+ ret
+
+with -ffast-math.
+
+//===---------------------------------------------------------------------===//
+
+This function (from PR9803):
+
+int clamp2(int a) {
+ if (a > 5)
+ a = 5;
+ if (a < 0)
+ return 0;
+ return a;
+}
+
+Compiles to:
+
+_clamp2: ## @clamp2
+ pushq %rbp
+ movq %rsp, %rbp
+ cmpl $5, %edi
+ movl $5, %ecx
+ cmovlel %edi, %ecx
+ testl %ecx, %ecx
+ movl $0, %eax
+ cmovnsl %ecx, %eax
+ popq %rbp
+ ret
+
+The move of 0 could be scheduled above the test to make it is xor reg,reg.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 1287977..cd06060 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -1,4 +1,4 @@
-//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -95,12 +95,29 @@ void DecodePSHUFLWMask(unsigned Imm,
ShuffleMask.push_back(7);
}
-void DecodePUNPCKLMask(unsigned NElts,
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i);
- ShuffleMask.push_back(i+NElts);
- }
+ DecodeUNPCKLPMask(VT, ShuffleMask);
}
void DecodePUNPCKHMask(unsigned NElts,
@@ -133,15 +150,40 @@ void DecodeUNPCKHPMask(unsigned NElts,
}
}
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc. NElts indicates the number of elements in the vector allowing it to
-/// handle different datatypes and vector widths.
-void DecodeUNPCKLPMask(unsigned NElts,
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i); // Reads from dest
- ShuffleMask.push_back(i+NElts); // Reads from src
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts / 2;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start; i != End; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
+ }
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
}
}
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 50d9ccb..b18f670 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -16,6 +16,7 @@
#define X86_SHUFFLE_DECODE_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
@@ -45,7 +46,19 @@ void DecodePSHUFHWMask(unsigned Imm,
void DecodePSHUFLWMask(unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
-void DecodePUNPCKLMask(unsigned NElts,
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodePUNPCKHMask(unsigned NElts,
@@ -57,11 +70,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
void DecodeUNPCKHPMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc. NElts indicates the number of elements in the vector allowing it to
-/// handle different datatypes and vector widths.
-void DecodeUNPCKLPMask(unsigned NElts,
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
} // llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index efb6c8c..25b8d3e 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -1,13 +1,13 @@
//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
-// This is a target description file for the Intel i386 architecture, refered to
+// This is a target description file for the Intel i386 architecture, referred to
// here as the "X86" architecture.
//
//===----------------------------------------------------------------------===//
@@ -32,7 +32,7 @@ def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
- // SSE1+ processors support them.
+ // SSE1+ processors support them.
[FeatureMMX, FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
@@ -50,7 +50,8 @@ def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
- "Enable 3DNow! instructions">;
+ "Enable 3DNow! instructions",
+ [FeatureMMX]>;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
"Enable 3DNow! Athlon instructions",
[Feature3DNow]>;
@@ -125,10 +126,10 @@ def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
-def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
-def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
-def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
-def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k6-2", [Feature3DNow]>;
+def : Proc<"k6-3", [Feature3DNow]>;
+def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
@@ -156,8 +157,8 @@ def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
-def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
-def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"winchip2", [Feature3DNow]>;
+def : Proc<"c3", [Feature3DNow]>;
def : Proc<"c3-2", [FeatureSSE1]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index da5f5b1..4d7d96d 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,6 +29,13 @@
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
+// Option to allow disabling arithmetic relaxation to workaround PR9807, which
+// is useful when running bitwise comparison experiments on Darwin. We should be
+// able to remove this once PR9807 is resolved.
+static cl::opt<bool>
+MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
+ cl::desc("Disable relaxation of arithmetic instruction for X86"));
+
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: assert(0 && "invalid fixup kind!");
@@ -201,6 +209,9 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
return true;
+ if (MCDisableArithRelaxation)
+ return false;
+
// Check if this instruction is ever relaxable.
if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
return false;
@@ -307,10 +318,13 @@ public:
: ELFX86AsmBackend(T, OSType) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(new X86ELFObjectWriter(false, OSType,
- ELF::EM_386, false),
+ return createELFObjectWriter(createELFObjectTargetWriter(),
OS, /*IsLittleEndian*/ true);
}
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false);
+ }
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
@@ -319,10 +333,13 @@ public:
: ELFX86AsmBackend(T, OSType) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(new X86ELFObjectWriter(true, OSType,
- ELF::EM_X86_64, true),
+ return createELFObjectWriter(createELFObjectTargetWriter(),
OS, /*IsLittleEndian*/ true);
}
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true);
+ }
};
class WindowsX86AsmBackend : public X86AsmBackend {
@@ -408,34 +425,26 @@ public:
TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return new DarwinX86_32AsmBackend(T);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (Triple(TT).getEnvironment() == Triple::MachO)
- return new DarwinX86_32AsmBackend(T);
- else
- return new WindowsX86AsmBackend(T, false);
- default:
- return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
- }
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, false);
+
+ return new ELFX86_32AsmBackend(T, TheTriple.getOS());
}
TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return new DarwinX86_64AsmBackend(T);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (Triple(TT).getEnvironment() == Triple::MachO)
- return new DarwinX86_64AsmBackend(T);
- else
- return new WindowsX86AsmBackend(T, true);
- default:
- return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
- }
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, true);
+
+ return new ELFX86_64AsmBackend(T, TheTriple.getOS());
}
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 99b4479..c2d53c4 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -709,12 +709,13 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
//===----------------------------------------------------------------------===//
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+ TargetMachine &TM,
unsigned SyntaxVariant,
const MCAsmInfo &MAI) {
if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI);
+ return new X86ATTInstPrinter(TM, MAI);
if (SyntaxVariant == 1)
- return new X86IntelInstPrinter(MAI);
+ return new X86IntelInstPrinter(TM, MAI);
return 0;
}
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index a44fb69..5635175 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -215,6 +215,13 @@ def CC_X86_Win64_C : CallingConv<[
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Do not pass the sret argument in RCX, the Win64 thiscall calling
+ // convention requires "this" to be passed in RCX.
+ CCIfCC<"CallingConv::X86_ThisCall",
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ],
+ [XMM1, XMM2, XMM3]>>>>,
+
CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
[XMM0, XMM1, XMM2, XMM3]>>,
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 60d9d4a..421e221 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -652,6 +652,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::TB: // Two-byte opcode prefix
case X86II::T8: // 0F 38
case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
Need0FPrefix = true;
break;
case X86II::TF: // F2 0F 38
@@ -695,6 +697,12 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::TA: // 0F 3A
MCE.emitByte(0x3A);
break;
+ case X86II::A6: // 0F A6
+ MCE.emitByte(0xA6);
+ break;
+ case X86II::A7: // 0F A7
+ MCE.emitByte(0xA7);
+ break;
}
// If this is a two-address instruction, skip one of the register operands.
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 6fa9284..1382f18 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -23,6 +23,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -77,10 +78,8 @@ private:
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
- bool X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM);
- bool X86FastEmitStore(EVT VT, unsigned Val,
- const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
@@ -125,6 +124,8 @@ private:
unsigned TargetMaterializeAlloca(const AllocaInst *C);
+ unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
+
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
@@ -133,6 +134,9 @@ private:
}
bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+
+ bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len);
};
} // end anonymous namespace.
@@ -224,8 +228,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
- const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -395,37 +398,45 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
const Value *Op = *i;
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
- unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
- Disp += SL->getElementOffset(Idx);
- } else {
- uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
- SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(Op);
- do {
- Op = Worklist.pop_back_val();
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- // Constant-offset addressing.
- Disp += CI->getSExtValue() * S;
- } else if (isa<AddOperator>(Op) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add with a constant operand. Fold the constant.
- ConstantInt *CI =
- cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
- Disp += CI->getSExtValue() * S;
- // Add the other operand back to the work list.
- Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
- } else if (IndexReg == 0 &&
- (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
- (S == 1 || S == 2 || S == 4 || S == 8)) {
- // Scaled-index addressing.
- Scale = S;
- IndexReg = getRegForGEPIndex(Op).first;
- if (IndexReg == 0)
- return false;
- } else
- // Unsupported.
- goto unsupported_gep;
- } while (!Worklist.empty());
+ Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+ continue;
+ }
+
+ // A array/variable index is always of the form i*S where S is the
+ // constant scale size. See if we can push the scale into immediates.
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ break;
+ }
+ // Unsupported.
+ goto unsupported_gep;
}
}
// Check for displacement overflow.
@@ -439,7 +450,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
if (X86SelectAddress(U->getOperand(0), AM))
return true;
- // If we couldn't merge the sub value into this addr mode, revert back to
+ // If we couldn't merge the gep value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
break;
@@ -451,91 +462,91 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Handle constant address.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models yet.
+ // Can't handle alternate code models or TLS yet.
if (TM.getCodeModel() != CodeModel::Small)
return false;
- // RIP-relative addresses can't have additional register operands.
- if (Subtarget->isPICStyleRIPRel() &&
- (AM.Base.Reg != 0 || AM.IndexReg != 0))
- return false;
-
- // Can't handle TLS yet.
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal())
return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
- // Okay, we've committed to selecting this global. Set up the basic address.
- AM.GV = GV;
-
- // Allow the subtarget to classify the global.
- unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
-
- // If this reference is relative to the pic base, set it now.
- if (isGlobalRelativeToPICBase(GVFlags)) {
- // FIXME: How do we know Base.Reg is free??
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- }
-
- // Unless the ABI requires an extra load, return a direct reference to
- // the global.
- if (!isGlobalStubReference(GVFlags)) {
- if (Subtarget->isPICStyleRIPRel()) {
- // Use rip-relative addressing if we can. Above we verified that the
- // base and index registers are unused.
- assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
- AM.Base.Reg = X86::RIP;
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
}
- AM.GVOpFlags = GVFlags;
- return true;
- }
- // Ok, we need to do a load from a stub. If we've already loaded from this
- // stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
- unsigned LoadReg;
- if (I != LocalValueMap.end() && I->second != 0) {
- LoadReg = I->second;
- } else {
- // Issue load from stub.
- unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
- X86AddressMode StubAM;
- StubAM.Base.Reg = AM.Base.Reg;
- StubAM.GV = GV;
- StubAM.GVOpFlags = GVFlags;
-
- // Prepare for inserting code in the local-value area.
- SavePoint SaveInsertPt = enterLocalValueArea();
-
- if (TLI.getPointerTy() == MVT::i64) {
- Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
-
- if (Subtarget->isPICStyleRIPRel())
- StubAM.Base.Reg = X86::RIP;
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
} else {
- Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
- }
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ }
- LoadReg = createResultReg(RC);
- MachineInstrBuilder LoadMI =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
- addFullAddress(LoadMI, StubAM);
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
- // Ok, back to normal mode.
- leaveLocalValueArea(SaveInsertPt);
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
- // Prevent loading GV stub multiple times in same MBB.
- LocalValueMap[V] = LoadReg;
- }
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
- // Now construct the final address. Note that the Disp, Scale,
- // and Index values may already be set here.
- AM.Base.Reg = LoadReg;
- AM.GV = 0;
- return true;
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
}
// If all else fails, try to materialize the value in a register.
@@ -856,12 +867,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
unsigned NEReg = createResultReg(&X86::GR8RegClass);
unsigned PReg = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::SETNEr), NEReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::SETPr), PReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::OR8rr), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
.addReg(PReg).addReg(NEReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -1059,14 +1067,49 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
}
}
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+ // typically happen for _Bool and C++ bools.
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+ unsigned TestOpc = 0;
+ switch (SourceVT.SimpleTy) {
+ default: break;
+ case MVT::i8: TestOpc = X86::TEST8ri; break;
+ case MVT::i16: TestOpc = X86::TEST16ri; break;
+ case MVT::i32: TestOpc = X86::TEST32ri; break;
+ case MVT::i64: TestOpc = X86::TEST64ri32; break;
+ }
+ if (TestOpc) {
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ if (OpReg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
+ .addReg(OpReg).addImm(1);
+
+ unsigned JmpOpc = X86::JNE_4;
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ JmpOpc = X86::JE_4;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ }
}
// Otherwise do a clumsy setcc and re-test it.
+ // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+ // in an explicit cast, so make sure to handle that correctly.
unsigned OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
- .addReg(OpReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
+ .addReg(OpReg).addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DL);
@@ -1075,42 +1118,42 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
bool X86FastISel::X86SelectShift(const Instruction *I) {
- unsigned CReg = 0, OpReg = 0, OpImm = 0;
+ unsigned CReg = 0, OpReg = 0;
const TargetRegisterClass *RC = NULL;
if (I->getType()->isIntegerTy(8)) {
CReg = X86::CL;
RC = &X86::GR8RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
- case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
- case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
+ case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(16)) {
CReg = X86::CX;
RC = &X86::GR16RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
- case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
- case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
+ case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(32)) {
CReg = X86::ECX;
RC = &X86::GR32RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
- case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
- case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
+ case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(64)) {
CReg = X86::RCX;
RC = &X86::GR64RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
- case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
- case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
+ case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; break;
default: return false;
}
} else {
@@ -1124,15 +1167,6 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
- // Fold immediate in shl(x,3).
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
- ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
- UpdateValueMap(I, ResultReg);
- return true;
- }
-
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1294,10 +1328,61 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
return false;
}
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len) {
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ bool i64Legal = TLI.isTypeLegal(MVT::i64);
+ if (Len > (i64Legal ? 32 : 16)) return false;
+
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 8 && i64Legal)
+ VT = MVT::i64;
+ else if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert(Len == 1);
+ VT = MVT::i8;
+ }
+
+ unsigned Reg;
+ bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
+ RV &= X86FastEmitStore(VT, Reg, DestAM);
+ assert(RV && "Failed to emit load or store??");
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ DestAM.Disp += Size;
+ SrcAM.Disp += Size;
+ }
+
+ return true;
+}
+
bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
+ case Intrinsic::memcpy: {
+ const MemCpyInst &MCI = cast<MemCpyInst>(I);
+ // Don't handle volatile or variable length memcpys.
+ if (MCI.isVolatile() || !isa<ConstantInt>(MCI.getLength()))
+ return false;
+
+ uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+
+ // Get the address of the dest and source addresses.
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ return false;
+
+ return TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ }
+
case Intrinsic::stackprotector: {
// Emit code inline code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
@@ -1308,17 +1393,14 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Grab the frame index.
X86AddressMode AM;
if (!X86SelectAddress(Slot, AM)) return false;
-
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
-
return true;
}
case Intrinsic::objectsize: {
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+ // FIXME: This should be moved to generic code!
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
- assert(CI && "Non-constant type in Intrinsic::objectsize?");
-
MVT VT;
if (!isTypeLegal(Ty, VT))
return false;
@@ -1356,6 +1438,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
}
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow: {
+ // FIXME: Should fold immediates.
+
// Replace "add with overflow" intrinsics with an "add" instruction followed
// by a seto/setc instruction. Later on, when the "extractvalue"
// instructions are encountered, we use the fact that two registers were
@@ -1427,8 +1511,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Handle only C and fastcc calling conventions for now.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
- if (CC != CallingConv::C &&
- CC != CallingConv::Fast &&
+ if (CC != CallingConv::C && CC != CallingConv::Fast &&
CC != CallingConv::X86_FastCall)
return false;
@@ -1437,14 +1520,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
return false;
- // Let SDISel handle vararg functions.
const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- if (FTy->isVarArg())
+ bool isVarArg = FTy->isVarArg();
+
+ // Don't know how to handle Win64 varargs yet. Nothing special needed for
+ // x86-32. Special handling for x86-64 is implemented.
+ if (isVarArg && Subtarget->isTargetWin64())
return false;
// Fast-isel doesn't know about callee-pop yet.
- if (Subtarget->IsCalleePop(FTy->isVarArg(), CC))
+ if (Subtarget->IsCalleePop(isVarArg, CC))
return false;
// Handle *simple* calls for now.
@@ -1487,9 +1573,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
ArgFlags.reserve(CS.arg_size());
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
- unsigned Arg = getRegForValue(*i);
- if (Arg == 0)
- return false;
+ Value *ArgVal = *i;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
if (CS.paramHasAttr(AttrInd, Attribute::SExt))
@@ -1497,34 +1581,67 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
+ // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
+ // instruction. This is safe because it is common to all fastisel supported
+ // calling conventions on x86.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
+ if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
+ CI->getBitWidth() == 16) {
+ if (Flags.isSExt())
+ ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+ else
+ ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+ }
+ }
+
+ unsigned ArgReg;
+
+ // Passing bools around ends up doing a trunc to i1 and passing it.
+ // Codegen this as an argument + "and 1".
+ if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
+ cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
+ ArgVal->hasOneUse()) {
+ ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
+ ArgReg = getRegForValue(ArgVal);
+ if (ArgReg == 0) return false;
+
+ MVT ArgVT;
+ if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
+
+ ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
+ ArgVal->hasOneUse(), 1);
+ } else {
+ ArgReg = getRegForValue(ArgVal);
+ }
+
+ if (ArgReg == 0) return false;
+
// FIXME: Only handle *easy* calls for now.
if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
CS.paramHasAttr(AttrInd, Attribute::Nest) ||
CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
- const Type *ArgTy = (*i)->getType();
+ const Type *ArgTy = ArgVal->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
- Args.push_back(Arg);
- ArgVals.push_back(*i);
+ Args.push_back(ArgReg);
+ ArgVals.push_back(ArgVal);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
}
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
+ CCState CCInfo(CC, isVarArg, TM, ArgLocs, I->getParent()->getContext());
// Allocate shadow area for Win64
- if (Subtarget->isTargetWin64()) {
+ if (Subtarget->isTargetWin64())
CCInfo.AllocateStack(32, 8);
- }
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
@@ -1618,6 +1735,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
X86::EBX).addReg(Base);
}
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
+ X86::AL).addImm(NumXMMRegs);
+ }
+
// Issue the call.
MachineInstrBuilder MIB;
if (CalleeOp) {
@@ -1656,7 +1784,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -1672,14 +1801,20 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (Subtarget->isPICStyleGOT())
MIB.addReg(X86::EBX);
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+ MIB.addReg(X86::AL);
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
// Issue CALLSEQ_END
unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ unsigned NumBytesCallee = 0;
+ if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+ NumBytesCallee = 4;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
- .addImm(NumBytes).addImm(0);
+ .addImm(NumBytes).addImm(NumBytesCallee);
// Now handle call return value (if any).
SmallVector<unsigned, 4> UsedRegs;
@@ -1850,10 +1985,13 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
if (X86SelectAddress(C, AM)) {
- if (TLI.getPointerTy() == MVT::i32)
- Opc = X86::LEA32r;
- else
- Opc = X86::LEA64r;
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+ return AM.Base.Reg;
+
+ Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg), AM);
@@ -1915,6 +2053,45 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
return ResultReg;
}
+unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+ MVT VT;
+ if (!isTypeLegal(CF->getType(), VT))
+ return false;
+
+ // Get opcode and regclass for the given zero.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::FsFLD0SS;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::FsFLD0SD;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+ return ResultReg;
+}
+
+
/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 3aaa693..325d061 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -1307,7 +1307,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// set up by FpSET_ST0, and our StackTop is off by one because of it.
unsigned Op0 = getFPReg(MI->getOperand(0));
// Restore the actual StackTop from before Fp_SET_ST0.
- // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
+ // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we
// are not enforcing the constraint.
++StackTop;
unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 0a3f931..06d12fc 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -296,7 +297,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
// FIXME: This is dirty hack. The code itself is pretty mess right now.
// It should be rewritten from scratch and generalized sometimes.
- // Determine maximum offset (minumum due to stack growth).
+ // Determine maximum offset (minimum due to stack growth).
int64_t MaxOffset = 0;
for (std::vector<CalleeSavedInfo>::const_iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I)
@@ -551,65 +552,71 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// responsible for adjusting the stack pointer. Touching the stack at 4K
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
- if (NumBytes >= 4096 &&
- (STI.isTargetCygMing() || STI.isTargetWin32()) &&
- !STI.isTargetEnvMacho()) {
+ if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+ const char *StackProbeSymbol;
+ bool isSPUpdateNeeded = false;
+
+ if (Is64Bit) {
+ if (STI.isTargetCygMing())
+ StackProbeSymbol = "___chkstk";
+ else {
+ StackProbeSymbol = "__chkstk";
+ isSPUpdateNeeded = true;
+ }
+ } else if (STI.isTargetCygMing())
+ StackProbeSymbol = "_alloca";
+ else
+ StackProbeSymbol = "_chkstk";
+
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
- const char *StackProbeSymbol =
- STI.isTargetWindows() ? "_chkstk" : "_alloca";
- if (Is64Bit && STI.isTargetCygMing())
- StackProbeSymbol = "__chkstk";
- unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
- if (!isEAXAlive) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(CallOp))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- } else {
+ if (isEAXAlive) {
+ // Sanity check that EAX is not livein for this function.
+ // It should not be, so throw an assert.
+ assert(!Is64Bit && "EAX is livein in x64 case!");
+
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
.addReg(X86::EAX, RegState::Kill);
+ }
- // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
- // allocated bytes for EAX.
+ if (Is64Bit) {
+ // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+ // Function prologue is responsible for adjusting the stack pointer.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
+ .addImm(NumBytes);
+ } else {
+ // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
+ // We'll also use 4 already allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes - 4);
- BuildMI(MBB, MBBI, DL, TII.get(CallOp))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
- MBB.insert(MBBI, MI);
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+ // MSVC x64's __chkstk needs to adjust %rsp.
+ // FIXME: %rax preserves the offset and should be available.
+ if (isSPUpdateNeeded)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+
+ if (isEAXAlive) {
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MBB.insert(MBBI, MI);
}
- } else if (NumBytes >= 4096 &&
- STI.isTargetWin64() &&
- !STI.isTargetEnvMacho()) {
- // Sanity check that EAX is not livein for this function. It should
- // not be, so throw an assert.
- assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
-
- // Handle the 64-bit Windows ABI case where we need to call __chkstk.
- // Function prologue is responsible for adjusting the stack pointer.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
- .addExternalSymbol("__chkstk")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
- TII, *RegInfo);
} else if (NumBytes)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
TII, *RegInfo);
- if ((NumBytes || PushedRegs) && needsFrameMoves) {
+ if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
@@ -779,7 +786,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
assert(Offset >= 0 && "Offset should never be negative");
if (Offset) {
- // Check for possible merge with preceeding ADD instruction.
+ // Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
}
@@ -823,7 +830,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
int delta = -1*X86FI->getTCReturnAddrDelta();
MBBI = MBB.getLastNonDebugInstr();
- // Check for possible merge with preceeding ADD instruction.
+ // Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
}
@@ -892,7 +899,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
- bool isWin64 = STI.isTargetWin64();
unsigned SlotSize = STI.is64Bit() ? 8 : 4;
unsigned FPReg = TRI->getFrameRegister(MF);
unsigned CalleeFrameSize = 0;
@@ -900,25 +906,39 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ // Push GPRs. It increases frame size.
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
if (Reg == FPReg)
// X86RegisterInfo::emitPrologue will handle spilling of frame register.
continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- CalleeFrameSize += SlotSize;
- BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- RC, TRI);
- }
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
}
X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+ // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+ // It can be done by spilling XMMs to stack frame.
+ // Note that only Win64 ABI might spill XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, TRI);
+ }
+
return true;
}
@@ -933,21 +953,30 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Reload XMMs from stack frame.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+
+ // POP GPRs.
unsigned FPReg = TRI->getFrameRegister(MF);
- bool isWin64 = STI.isTargetWin64();
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
if (Reg == FPReg)
// X86RegisterInfo::emitEpilogue will handle restoring of frame register.
continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
- }
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
}
return true;
}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9b0ec6e..4534e85 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1580,6 +1580,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal;
break;
}
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ // For operations of the form (x << C1) op C2, check if we can use a smaller
+ // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+ break;
+
+ // i8 is unshrinkable, i16 should be promoted to i32.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ break;
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!Cst || !ShlCst)
+ break;
+
+ int64_t Val = Cst->getSExtValue();
+ uint64_t ShlVal = ShlCst->getZExtValue();
+
+ // Make sure that we don't change the operation by removing bits.
+ // This only matters for OR and XOR, AND is unaffected.
+ if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+ break;
+
+ unsigned ShlOp, Op = 0;
+ EVT CstVT = NVT;
+
+ // Check the minimum bitwidth for the new constant.
+ // TODO: AND32ri is the same as AND64ri32 with zext imm.
+ // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+ // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+ if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
+ CstVT = MVT::i8;
+ else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
+ CstVT = MVT::i32;
+
+ // Bail if there is no smaller encoding.
+ if (NVT == CstVT)
+ break;
+
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i32:
+ assert(CstVT == MVT::i8);
+ ShlOp = X86::SHL32ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = X86::AND32ri8; break;
+ case ISD::OR: Op = X86::OR32ri8; break;
+ case ISD::XOR: Op = X86::XOR32ri8; break;
+ }
+ break;
+ case MVT::i64:
+ assert(CstVT == MVT::i8 || CstVT == MVT::i32);
+ ShlOp = X86::SHL64ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
+ case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
+ case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+ }
+ break;
+ }
+
+ // Emit the smaller op and the shift.
+ SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+ SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+ return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
+ getI8Imm(ShlVal));
+ break;
+ }
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2f49dbc..703c01d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -45,6 +45,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
@@ -221,7 +222,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
- setSchedulingPreference(Sched::RegPressure);
+
+ // For 64-bit since we have so many registers use the ILP scheduler, for
+ // 32-bit code use the register pressure specific scheduling.
+ if (Subtarget->is64Bit())
+ setSchedulingPreference(Sched::ILP);
+ else
+ setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
@@ -543,12 +550,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (Subtarget->is64Bit())
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- else
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC,
+ (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
+ (Subtarget->isTargetCOFF()
+ && !Subtarget->isTargetEnvMacho()
+ ? Custom : Expand));
if (!UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
@@ -921,6 +927,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Can turn SHL into an integer multiply.
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
@@ -1271,27 +1278,6 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
return std::make_pair(RRC, Cost);
}
-// FIXME: Why this routine is here? Move to RegInfo!
-unsigned
-X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
- switch (RC->getID()) {
- default:
- return 0;
- case X86::GR32RegClassID:
- return 4 - FPDiff;
- case X86::GR64RegClassID:
- return 8 - FPDiff;
- case X86::VR128RegClassID:
- return Subtarget->is64Bit() ? 10 : 4;
- case X86::VR64RegClassID:
- return 4;
- }
-}
-
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
@@ -1463,6 +1449,20 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
return HasRet;
}
+EVT
+X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const {
+ MVT ReturnMVT;
+ // TODO: Is this also valid on 32-bit?
+ if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+ ReturnMVT = MVT::i8;
+ else
+ ReturnMVT = MVT::i32;
+
+ EVT MinVT = getRegisterType(Context, ReturnMVT);
+ return VT.bitsLT(MinVT) ? MinVT : VT;
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
@@ -1595,6 +1595,18 @@ static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
}
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+
+ CallSite CS(CI);
+ CallingConv::ID CalleeCC = CS.getCallingConv();
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ return true;
+}
+
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
@@ -1627,8 +1639,9 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
if (Flags.isByVal()) {
- int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), isImmutable);
+ unsigned Bytes = Flags.getByValSize();
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
@@ -1765,8 +1778,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall))) {
+ if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
@@ -1818,7 +1831,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
FuncInfo->setRegSaveFrameIndex(
MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
- FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ // Fixup to set vararg frame on shadow area (4 x i64).
+ if (NumIntRegs < 4)
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
} else {
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so they
@@ -1937,7 +1952,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
return SDValue(OutRetAddr.getNode(), 1);
}
-/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
@@ -2028,7 +2043,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue RetAddrFrIdx;
- // Load return adress for tail calls.
+ // Load return address for tail calls.
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
@@ -2185,7 +2200,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
- // Do not flag preceeding copytoreg stuff together with the following stuff.
+ // Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
if (GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -2266,7 +2281,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -2285,7 +2301,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -3173,7 +3190,8 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
- if (NumElems != 2 && NumElems != 4)
+ if ((NumElems != 2 && NumElems != 4)
+ || N->getValueType(0).getSizeInBits() > 128)
return false;
for (unsigned i = 0; i < NumElems/2; ++i)
@@ -3195,19 +3213,36 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return false;
- for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (!isUndefOrEqual(BitI1, NumElts))
- return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts))
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start, j = s * NumSectionElts;
+ i != End;
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
}
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
}
+
return true;
}
@@ -3255,14 +3290,27 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
- for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (!isUndefOrEqual(BitI1, j))
- return false;
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElems / NumSections;
+
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
+ i != NumSectionElts * (s + 1);
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
}
+
return true;
}
@@ -3846,8 +3894,8 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
-SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
- unsigned Depth) {
+static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+ unsigned Depth) {
if (Depth == 6)
return SDValue(); // Limit search depth.
@@ -3895,11 +3943,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
- DecodePUNPCKLMask(NumElems, ShuffleMask);
+ DecodePUNPCKLMask(VT, ShuffleMask);
break;
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- DecodeUNPCKLPMask(NumElems, ShuffleMask);
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ DecodeUNPCKLPMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, ShuffleMask);
@@ -3968,7 +4020,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
/// getNumOfConsecutiveZeros - Return the number of elements of a vector
/// shuffle operation which come from a consecutively from a zero. The
-/// search can start in two diferent directions, from left or right.
+/// search can start in two different directions, from left or right.
static
unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
bool ZerosFromLeft, SelectionDAG &DAG) {
@@ -5263,6 +5315,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
+ Locs.resize(4);
SmallVector<int,8> LoMask(4U, -1);
SmallVector<int,8> HiMask(4U, -1);
@@ -5508,12 +5561,16 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
-static inline unsigned getUNPCKLOpcode(EVT VT) {
+static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
- case MVT::v4f32: return X86ISD::UNPCKLPS;
- case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v4f32:
+ return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
+ case MVT::v2f64:
+ return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+ case MVT::v4f64: return X86ISD::VUNPCKLPDY;
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
default:
@@ -5641,7 +5698,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -5762,7 +5819,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (X86::isUNPCKLMask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V1, V2, DAG);
if (X86::isUNPCKHMask(SVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
@@ -5789,7 +5847,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
@@ -5812,8 +5871,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
- if (VT == MVT::v2f64)
- return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2f64) {
+ X86ISD::NodeType Opcode =
+ getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
+ }
if (VT == MVT::v2i64)
return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
}
@@ -5840,7 +5902,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (X86::isUNPCKL_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V1, V1, DAG);
if (X86::isUNPCKH_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -7868,6 +7931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
"This should be used only on Windows targets");
+ assert(!Subtarget->isTargetEnvMacho());
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
@@ -7878,8 +7942,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Flag;
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+ unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
- Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -8809,8 +8874,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
case ISD::SADDO:
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
- if (C->getAPIntValue() == 1) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
BaseOp = X86ISD::INC;
Cond = X86::COND_O;
break;
@@ -8825,8 +8890,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
case ISD::SSUBO:
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
- if (C->getAPIntValue() == 1) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
BaseOp = X86ISD::DEC;
Cond = X86::COND_O;
break;
@@ -10351,21 +10416,48 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
+ assert(!Subtarget->isTargetEnvMacho());
+
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
- // FIXME: The code should be tweaked as soon as we'll try to do codegen for
- // mingw-w64.
- const char *StackProbeSymbol =
+ if (Subtarget->isTargetWin64()) {
+ if (Subtarget->isTargetCygMing()) {
+ // ___chkstk(Mingw64):
+ // Clobbers R10, R11, RAX and EFLAGS.
+ // Updates RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("___chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::RSP, RegState::Implicit)
+ .addReg(X86::RAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::RSP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ } else {
+ // __chkstk(MSVCRT): does not update stack pointer.
+ // Clobbers R10, R11 and EFLAGS.
+ // FIXME: RAX(allocated size) might be reused and not killed.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("__chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ // RAX has the offset to subtracted from RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
+ .addReg(X86::RSP)
+ .addReg(X86::RAX);
+ }
+ } else {
+ const char *StackProbeSymbol =
Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
- BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(X86::EAX, RegState::Implicit)
- .addReg(X86::ESP, RegState::Implicit)
- .addReg(X86::EAX, RegState::Define | RegState::Implicit)
- .addReg(X86::ESP, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(X86::EAX, RegState::Implicit)
+ .addReg(X86::ESP, RegState::Implicit)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ }
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -12126,7 +12218,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
- // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // FIXME: this should verify that we are targeting a 486 or better. If not,
// we will turn this bswap into something that will be lowered to logical ops
// instead of emitting the bswap asm. For now, we don't support 486 or lower
// so don't worry about this.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 6ec4a7d..6301057 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -677,9 +677,6 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
- unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const;
-
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
/// space, and populates the address space and offset as
@@ -846,6 +843,12 @@ namespace llvm {
virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+ virtual EVT
+ getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const;
+
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index 45d1c6b..dd4f6a5 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -12,66 +12,91 @@
//
//===----------------------------------------------------------------------===//
-// FIXME: We don't support any intrinsics for these instructions yet.
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+ : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
-class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src1 = $dst";
}
-class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
- : I<o, F, (outs VR64:$dst), ins,
- !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
- TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+ Has3DNow0F0FOpcode {
// FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
let isAsmParserOnly = 1;
}
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
-let Constraints = "$src1 = $dst" in {
- // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
- // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
- multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
- def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
- def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
- }
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn))
+ (bitconvert (load_mmx addr:$src))))]>;
}
-defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">;
-defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">;
-defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">;
-defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">;
-defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">;
-defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">;
-defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">;
-defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">;
-defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">;
-defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">;
-defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">;
-defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
-defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
-defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
-defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">;
-defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">;
-defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">;
-defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">;
-defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">;
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
"prefetch $addr", []>;
-
+
// FIXME: Diassembler gets a bogus decode conflict.
-let isAsmParserOnly = 1 in {
+let isAsmParserOnly = 1 in
def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
"prefetchw $addr", []>;
-}
// "3DNowA" instructions
-defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">;
-defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">;
-defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">;
-defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">;
-defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">;
+defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index f0ea068..9f7a4b0 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -163,7 +163,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
} // Defs = [EFLAGS]
-// Suprisingly enough, these are not two address instructions!
+// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 77f4725..c228a0ae 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -263,6 +263,16 @@ let isCall = 1, isCodeGenOnly = 1 in
Requires<[IsWin64]>;
}
+let isCall = 1, isCodeGenOnly = 1 in
+ // __chkstk(MSVC): clobber R10, R11 and EFLAGS.
+ // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
+ let Defs = [RAX, R10, R11, RSP, EFLAGS],
+ Uses = [RSP] in {
+ def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[IsWin64]>;
+ }
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 0660072..7daa264 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -91,21 +91,23 @@ class REX_W { bit hasREX_WPrefix = 1; }
class LOCK { bit hasLockPrefix = 1; }
class SegFS { bits<2> SegOvrBits = 1; }
class SegGS { bits<2> SegOvrBits = 2; }
-class TB { bits<4> Prefix = 1; }
-class REP { bits<4> Prefix = 2; }
-class D8 { bits<4> Prefix = 3; }
-class D9 { bits<4> Prefix = 4; }
-class DA { bits<4> Prefix = 5; }
-class DB { bits<4> Prefix = 6; }
-class DC { bits<4> Prefix = 7; }
-class DD { bits<4> Prefix = 8; }
-class DE { bits<4> Prefix = 9; }
-class DF { bits<4> Prefix = 10; }
-class XD { bits<4> Prefix = 11; }
-class XS { bits<4> Prefix = 12; }
-class T8 { bits<4> Prefix = 13; }
-class TA { bits<4> Prefix = 14; }
-class TF { bits<4> Prefix = 15; }
+class TB { bits<5> Prefix = 1; }
+class REP { bits<5> Prefix = 2; }
+class D8 { bits<5> Prefix = 3; }
+class D9 { bits<5> Prefix = 4; }
+class DA { bits<5> Prefix = 5; }
+class DB { bits<5> Prefix = 6; }
+class DC { bits<5> Prefix = 7; }
+class DD { bits<5> Prefix = 8; }
+class DE { bits<5> Prefix = 9; }
+class DF { bits<5> Prefix = 10; }
+class XD { bits<5> Prefix = 11; }
+class XS { bits<5> Prefix = 12; }
+class T8 { bits<5> Prefix = 13; }
+class TA { bits<5> Prefix = 14; }
+class A6 { bits<5> Prefix = 15; }
+class A7 { bits<5> Prefix = 16; }
+class TF { bits<5> Prefix = 17; }
class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -136,7 +138,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
- bits<4> Prefix = 0; // Which prefix byte does this inst have?
+ bits<5> Prefix = 0; // Which prefix byte does this inst have?
bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
@@ -154,20 +156,20 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{5-0} = FormBits;
let TSFlags{6} = hasOpSizePrefix;
let TSFlags{7} = hasAdSizePrefix;
- let TSFlags{11-8} = Prefix;
- let TSFlags{12} = hasREX_WPrefix;
- let TSFlags{15-13} = ImmT.Value;
- let TSFlags{18-16} = FPForm.Value;
- let TSFlags{19} = hasLockPrefix;
- let TSFlags{21-20} = SegOvrBits;
- let TSFlags{23-22} = ExeDomain.Value;
- let TSFlags{31-24} = Opcode;
- let TSFlags{32} = hasVEXPrefix;
- let TSFlags{33} = hasVEX_WPrefix;
- let TSFlags{34} = hasVEX_4VPrefix;
- let TSFlags{35} = hasVEX_i8ImmReg;
- let TSFlags{36} = hasVEX_L;
- let TSFlags{37} = has3DNow0F0FOpcode;
+ let TSFlags{12-8} = Prefix;
+ let TSFlags{13} = hasREX_WPrefix;
+ let TSFlags{16-14} = ImmT.Value;
+ let TSFlags{19-17} = FPForm.Value;
+ let TSFlags{20} = hasLockPrefix;
+ let TSFlags{22-21} = SegOvrBits;
+ let TSFlags{24-23} = ExeDomain.Value;
+ let TSFlags{32-25} = Opcode;
+ let TSFlags{33} = hasVEXPrefix;
+ let TSFlags{34} = hasVEX_WPrefix;
+ let TSFlags{35} = hasVEX_4VPrefix;
+ let TSFlags{36} = hasVEX_i8ImmReg;
+ let TSFlags{37} = hasVEX_L;
+ let TSFlags{38} = has3DNow0F0FOpcode;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -319,7 +321,7 @@ class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
// SSE2 Instruction Templates:
@@ -353,7 +355,7 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB,
OpSize, Requires<[HasAVX]>;
// SSE3 Instruction Templates:
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5016c0f..3cbfac1 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -132,6 +132,8 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 76a9b12..83f0260 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -232,7 +232,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -335,7 +335,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -460,7 +460,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -682,7 +682,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -916,7 +916,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVSDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
@@ -2241,6 +2240,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
bool isTwoAddr = NumOps > 1 &&
MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOpcode() == X86::ADD32ri &&
+ MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return NULL;
+
MachineInstr *NewMI = NULL;
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
@@ -2535,6 +2540,12 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
case X86::TEST32rr:
case X86::TEST64rr:
return true;
+ case X86::ADD32ri:
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return false;
+ break;
}
}
@@ -2845,11 +2856,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::FsMOVAPDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
- case X86::MOVDQUrm_Int:
break;
}
switch (Opc2) {
@@ -2869,11 +2878,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::FsMOVAPDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
- case X86::MOVDQUrm_Int:
break;
}
@@ -3085,12 +3092,8 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
-bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
- const MachineRegisterInfo *MRI,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- switch (DefMI->getOpcode()) {
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+ switch (opc) {
default: return false;
case X86::DIVSDrm:
case X86::DIVSDrm_Int:
@@ -3120,6 +3123,14 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
}
}
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ return isHighLatencyDef(DefMI->getOpcode());
+}
+
namespace {
/// CGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index fcb5a25..8da68b5 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -33,15 +33,15 @@ namespace X86 {
AddrScaleAmt = 1,
AddrIndexReg = 2,
AddrDisp = 3,
-
+
/// AddrSegmentReg - The operand # of the segment in the memory operand.
AddrSegmentReg = 4,
/// AddrNumOperands - Total number of operands in a memory reference.
AddrNumOperands = 5
};
-
-
+
+
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
enum CondCode {
@@ -72,16 +72,16 @@ namespace X86 {
COND_INVALID
};
-
+
// Turn condition code into conditional branch opcode.
unsigned GetCondBranchFromCond(CondCode CC);
-
+
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(X86::CondCode CC);
}
-
+
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
@@ -90,14 +90,14 @@ namespace X86II {
enum TOF {
//===------------------------------------------------------------------===//
// X86 Specific MachineOperand flags.
-
+
MO_NO_FLAG,
-
+
/// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
/// relocation of:
/// SYMBOL_LABEL + [. - PICBASELABEL]
MO_GOT_ABSOLUTE_ADDRESS,
-
+
/// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
/// immediate should get the value of the symbol minus the PIC base label:
/// SYMBOL_LABEL - PICBASELABEL
@@ -106,77 +106,77 @@ namespace X86II {
/// MO_GOT - On a symbol operand this indicates that the immediate is the
/// offset to the GOT entry for the symbol name from the base of the GOT.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOT
MO_GOT,
-
+
/// MO_GOTOFF - On a symbol operand this indicates that the immediate is
- /// the offset to the location of the symbol name from the base of the GOT.
+ /// the offset to the location of the symbol name from the base of the GOT.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOTOFF
MO_GOTOFF,
-
+
/// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
/// offset to the GOT entry for the symbol name from the current code
- /// location.
+ /// location.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOTPCREL
MO_GOTPCREL,
-
+
/// MO_PLT - On a symbol operand this indicates that the immediate is
- /// offset to the PLT entry of symbol name from the current code location.
+ /// offset to the PLT entry of symbol name from the current code location.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @PLT
MO_PLT,
-
+
/// MO_TLSGD - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSGD
MO_TLSGD,
-
+
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @GOTTPOFF
MO_GOTTPOFF,
-
+
/// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @INDNTPOFF
MO_INDNTPOFF,
-
+
/// MO_TPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TPOFF
MO_TPOFF,
-
+
/// MO_NTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @NTPOFF
MO_NTPOFF,
-
+
/// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "__imp_FOO" symbol. This is used for
/// dllimport linkage on windows.
MO_DLLIMPORT,
-
+
/// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "FOO$stub" symbol. This is used for calls
/// and jumps to external functions on Tiger and earlier.
MO_DARWIN_STUB,
-
+
/// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
/// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
@@ -186,19 +186,19 @@ namespace X86II {
/// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
/// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
MO_DARWIN_NONLAZY_PIC_BASE,
-
+
/// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
/// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
/// which is a PIC-base-relative reference to a hidden dyld lazy pointer
/// stub.
MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
-
+
/// MO_TLVP - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// This is the TLS offset for the Darwin TLS mechanism.
MO_TLVP,
-
+
/// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
/// is some TLS offset from the picbase.
///
@@ -239,7 +239,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
return false;
}
}
-
+
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
@@ -299,7 +299,7 @@ namespace X86II {
// MRMInitReg - This form is used for instructions whose source and
// destinations are the same register.
MRMInitReg = 32,
-
+
//// MRM_C1 - A mod/rm byte of exactly 0xC1.
MRM_C1 = 33,
MRM_C2 = 34,
@@ -318,7 +318,7 @@ namespace X86II {
/// immediates, the first of which is a 16-bit immediate (specified by
/// the imm encoding) and the second is a 8-bit fixed value.
RawFrmImm8 = 43,
-
+
/// RawFrmImm16 - This is used for CALL FAR instructions, which have two
/// immediates, the first of which is a 16 or 32-bit immediate (specified by
/// the imm encoding) and the second is a 16-bit fixed value. In the AMD
@@ -347,7 +347,7 @@ namespace X86II {
// set, there is no prefix byte for obtaining a multibyte opcode.
//
Op0Shift = 8,
- Op0Mask = 0xF << Op0Shift,
+ Op0Mask = 0x1F << Op0Shift,
// TB - TwoByte - Set if this instruction has a two byte opcode, which
// starts with a 0x0F byte before the real opcode.
@@ -368,11 +368,12 @@ namespace X86II {
// floating point operations performed in the SSE registers.
XD = 11 << Op0Shift, XS = 12 << Op0Shift,
- // T8, TA - Prefix after the 0x0F prefix.
+ // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
-
+ A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
+
// TF - Prefix before and after 0x0F
- TF = 15 << Op0Shift,
+ TF = 17 << Op0Shift,
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
@@ -380,13 +381,13 @@ namespace X86II {
// etc. We only cares about REX.W and REX.R bits and only the former is
// statically determined.
//
- REXShift = 12,
+ REXShift = Op0Shift + 5,
REX_W = 1 << REXShift,
//===------------------------------------------------------------------===//
// This three-bit field describes the size of an immediate operand. Zero is
// unused so that we can tell if we forgot to set a value.
- ImmShift = 13,
+ ImmShift = REXShift + 1,
ImmMask = 7 << ImmShift,
Imm8 = 1 << ImmShift,
Imm8PCRel = 2 << ImmShift,
@@ -400,7 +401,7 @@ namespace X86II {
// FP Instruction Classification... Zero is non-fp instruction.
// FPTypeMask - Mask for all of the FP types...
- FPTypeShift = 16,
+ FPTypeShift = ImmShift + 3,
FPTypeMask = 7 << FPTypeShift,
// NotFP - The default, set for instructions that do not use FP registers.
@@ -433,25 +434,26 @@ namespace X86II {
SpecialFP = 7 << FPTypeShift,
// Lock prefix
- LOCKShift = 19,
+ LOCKShift = FPTypeShift + 3,
LOCK = 1 << LOCKShift,
// Segment override prefixes. Currently we just need ability to address
// stuff in gs and fs segments.
- SegOvrShift = 20,
+ SegOvrShift = LOCKShift + 1,
SegOvrMask = 3 << SegOvrShift,
FS = 1 << SegOvrShift,
GS = 2 << SegOvrShift,
- // Execution domain for SSE instructions in bits 22, 23.
- // 0 in bits 22-23 means normal, non-SSE instruction.
- SSEDomainShift = 22,
+ // Execution domain for SSE instructions in bits 23, 24.
+ // 0 in bits 23-24 means normal, non-SSE instruction.
+ SSEDomainShift = SegOvrShift + 2,
- OpcodeShift = 24,
- OpcodeMask = 0xFF << OpcodeShift,
+ OpcodeShift = SSEDomainShift + 2,
+ OpcodeMask = 0xFFULL << OpcodeShift,
//===------------------------------------------------------------------===//
/// VEX - The opcode prefix used by AVX instructions
+ VEXShift = OpcodeShift + 8,
VEX = 1U << 0,
/// VEX_W - Has a opcode specific functionality, but is used in the same
@@ -473,7 +475,7 @@ namespace X86II {
/// if a VR256 register is used, but some AVX instructions also have this
/// field marked when using a f256 memory references.
VEX_L = 1U << 4,
-
+
/// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
/// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
/// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
@@ -482,18 +484,18 @@ namespace X86II {
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
Has3DNow0F0FOpcode = 1U << 5
};
-
+
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
-
+
static inline bool hasImm(uint64_t TSFlags) {
return (TSFlags & X86II::ImmMask) != 0;
}
-
+
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
static inline unsigned getSizeOfImm(uint64_t TSFlags) {
@@ -508,7 +510,7 @@ namespace X86II {
case X86II::Imm64: return 8;
}
}
-
+
/// isImmPCRel - Return true if the immediate of the specified instruction's
/// TSFlags indicates that it is pc relative.
static inline unsigned isImmPCRel(uint64_t TSFlags) {
@@ -525,7 +527,7 @@ namespace X86II {
return false;
}
}
-
+
/// getMemoryOperandNo - The function returns the MCInst operand # for the
/// first field of the memory operand. If the instruction doesn't have a
/// memory operand, this returns -1.
@@ -549,11 +551,11 @@ namespace X86II {
case X86II::MRMDestMem:
return 0;
case X86II::MRMSrcMem: {
- bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V;
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
-
+
// FIXME: Maybe lea should have its own form? This is a horrible hack.
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
// Opcode == X86::LEA16r || Opcode == X86::LEA32r)
@@ -613,7 +615,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
class X86InstrInfo : public TargetInstrInfoImpl {
X86TargetMachine &TM;
const X86RegisterInfo RI;
-
+
/// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
/// RegOp2MemOpTable2 - Load / store folding opcode maps.
///
@@ -621,7 +623,7 @@ class X86InstrInfo : public TargetInstrInfoImpl {
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
-
+
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
@@ -795,7 +797,7 @@ public:
virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
-
+
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
/// to determine if two loads are loading from the same base address. It
/// should only return true if the base pointers are the same and the
@@ -805,7 +807,7 @@ public:
int64_t &Offset1, int64_t &Offset2) const;
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
- /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
/// be scheduled togther. On some targets if two loads are loading from
/// addresses in the same cache line, it's better if they are scheduled
/// together. This function takes two integers that represent the load offsets
@@ -829,7 +831,7 @@ public:
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
-
+
static bool isX86_64ExtendedReg(const MachineOperand &MO) {
if (!MO.isReg()) return false;
return isX86_64ExtendedReg(MO.getReg());
@@ -858,11 +860,13 @@ public:
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+ bool isHighLatencyDef(int opc) const;
+
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
const MachineRegisterInfo *MRI,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI, unsigned UseIdx) const;
-
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index f832a7c..03a0b0c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -459,7 +459,7 @@ def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
include "X86InstrFormats.td"
//===----------------------------------------------------------------------===//
-// Pattern fragments...
+// Pattern fragments.
//
// X86 specific condition code. These correspond to CondCode in
@@ -481,21 +481,21 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
+let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
+ def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+ def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+ def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+}
-def i16immSExt8 : PatLeaf<(i16 immSext8)>;
-def i32immSExt8 : PatLeaf<(i32 immSext8)>;
-def i64immSExt8 : PatLeaf<(i64 immSext8)>;
-def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
-def i64immZExt32 : PatLeaf<(i64 imm), [{
- // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // unsignedsign extended field.
- return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
-def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
- uint64_t v = N->getZExtValue();
- return v == (uint32_t)v && (int32_t)v == (int8_t)v;
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+ return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
}]>;
// Helper fragments for loads.
@@ -1437,7 +1437,7 @@ def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
// Various unary fpstack operations default to operating on on ST1.
// For example, "fxch" -> "fxch %st(1)"
-def : InstAlias<"faddp", (ADD_FPrST0 ST1)>;
+def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>;
def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>;
def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
@@ -1455,13 +1455,15 @@ def : InstAlias<"fucompi", (UCOM_FIPr ST1)>;
// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
// gas.
-multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> {
- def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), (Inst RST:$op)>;
- def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>;
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),
+ (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"),
+ (Inst ST0), EmitAlias>;
}
defm : FpUnaryAlias<"fadd", ADD_FST0r>;
-defm : FpUnaryAlias<"faddp", ADD_FPrST0>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
defm : FpUnaryAlias<"fsub", SUB_FST0r>;
defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>;
defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
@@ -1472,8 +1474,8 @@ defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>;
defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
-defm : FpUnaryAlias<"fcomi", COM_FIr>;
-defm : FpUnaryAlias<"fucomi", UCOM_FIr>;
+defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
defm : FpUnaryAlias<"fcompi", COM_FIPr>;
defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
@@ -1481,7 +1483,7 @@ defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
// solely because gas supports it.
-def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
@@ -1534,29 +1536,31 @@ def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
// Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
-def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
// movsd with no operands (as opposed to the SSE scalar move of a double) is an
// alias for movsl. (as in rep; movsd)
def : InstAlias<"movsd", (MOVSD)>;
// movsx aliases
-def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
// movzx aliases
-def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
// Note: No GR32->GR64 movzx form.
// outb %dx -> outb %al, %dx
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b912949..cde3f6b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -135,18 +135,16 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
// is used instead. Register-to-register movss/movsd is not modeled as an
// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
-let isAsmParserOnly = 1 in {
- def VMOVSSrr : sse12_move_rr<FR32, v4f32,
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
- def VMOVSDrr : sse12_move_rr<FR64, v2f64,
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
- let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
- let AddedComplexity = 20 in
- def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
- }
+ let AddedComplexity = 20 in
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
}
let Constraints = "$src1 = $dst" in {
@@ -218,14 +216,12 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
-let isAsmParserOnly = 1 in {
def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>, XS, VEX;
def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>, XD, VEX;
-}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -251,7 +247,6 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
[(set RC:$dst, (ld_frag addr:$src))], d>;
}
-let isAsmParserOnly = 1 in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, VEX;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -269,7 +264,6 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
"movups", SSEPackedSingle>, VEX;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, 0>, OpSize, VEX;
-}
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, TB;
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -279,7 +273,6 @@ defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, 0>, TB, OpSize;
-let isAsmParserOnly = 1 in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
@@ -304,7 +297,6 @@ def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
-}
def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
@@ -328,32 +320,14 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v2f64 VR128:$src), addr:$dst)]>;
// Intrinsic forms of MOVUPS/D load and store
-let isAsmParserOnly = 1 in {
- let canFoldAsLoad = 1, isReMaterializable = 1 in
- def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
- def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
- def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
- def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
-}
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
-def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
@@ -382,7 +356,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
SSEPackedDouble>, TB, OpSize;
}
-let isAsmParserOnly = 1, AddedComplexity = 20 in {
+let AddedComplexity = 20 in {
defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
@@ -395,7 +369,6 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"\t{$src2, $dst|$dst, $src2}">;
}
-let isAsmParserOnly = 1 in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -404,7 +377,6 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
-}
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -416,7 +388,6 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
-let isAsmParserOnly = 1 in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
@@ -429,7 +400,6 @@ def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>,
VEX;
-}
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
@@ -441,7 +411,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>;
-let isAsmParserOnly = 1, AddedComplexity = 20 in {
+let AddedComplexity = 20 in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -516,7 +486,6 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
-let isAsmParserOnly = 1 in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
@@ -542,7 +511,6 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
VEX_4V;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
VEX_4V, VEX_W;
-}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
@@ -591,27 +559,25 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
-let isAsmParserOnly = 1 in {
- defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si">, XS, VEX;
- defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
- XS, VEX, VEX_W;
- defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si">, XD, VEX;
- defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
- XD, VEX, VEX_W;
-
- // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
- // Get rid of this hack or rename the intrinsics, there are several
- // intructions that only match with the intrinsic form, why create duplicates
- // to let them be recognized by the assembler?
- defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
- defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
-}
+defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si">, XS, VEX;
+defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+ XS, VEX, VEX_W;
+defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si">, XD, VEX;
+defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+ XD, VEX, VEX_W;
+
+// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+// Get rid of this hack or rename the intrinsics, there are several
+// intructions that only match with the intrinsic form, why create duplicates
+// to let them be recognized by the assembler?
+defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
f32mem, load, "cvtss2si">, XS;
defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
@@ -622,18 +588,16 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
f128mem, load, "cvtsd2si{q}">, XD, REX_W;
-let isAsmParserOnly = 1 in {
- defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
- defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
- VEX_W;
- defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
- defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
- VEX_4V, VEX_W;
-}
+defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ VEX_W;
+defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -653,7 +617,6 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
-let isAsmParserOnly = 1 in {
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
@@ -664,7 +627,6 @@ defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
"cvttsd2si">, XD, VEX, VEX_W;
-}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
@@ -676,7 +638,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
"cvttsd2si{q}">, XD, REX_W;
-let isAsmParserOnly = 1, Pattern = []<dag> in {
+let Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
@@ -702,7 +664,6 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
/// SSE 2 Only
// Convert scalar double to scalar single
-let isAsmParserOnly = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -711,7 +672,6 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
-}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
Requires<[HasAVX]>;
@@ -723,7 +683,6 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
Requires<[HasSSE2, OptForSize]>;
-let isAsmParserOnly = 1 in
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
XS, VEX_4V;
@@ -732,7 +691,7 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
// Convert scalar single to scalar double
-let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+// SSE2 instructions with XS prefix
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -741,7 +700,6 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
-}
def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
Requires<[HasAVX]>;
@@ -754,7 +712,6 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, OptForSize]>;
-let isAsmParserOnly = 1 in {
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -767,7 +724,6 @@ def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
[(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
(load addr:$src2)))]>, XS, VEX_4V,
Requires<[HasAVX]>;
-}
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -788,7 +744,7 @@ def : Pat<(extloadf32 addr:$src),
Requires<[HasSSE2, OptForSpeed]>;
// Convert doubleword to packed single/double fp
-let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+// SSE2 instructions without OpSize prefix
def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -798,7 +754,6 @@ def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps
(bitconvert (memopv2i64 addr:$src))))]>,
TB, VEX, Requires<[HasAVX]>;
-}
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -810,7 +765,7 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
TB, Requires<[HasSSE2]>;
// FIXME: why the non-intrinsic version is described as SSE3?
-let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+// SSE2 instructions with XS prefix
def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -820,7 +775,6 @@ def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd
(bitconvert (memopv2i64 addr:$src))))]>,
XS, VEX, Requires<[HasAVX]>;
-}
def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -833,7 +787,6 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
// Convert packed single/double fp to doubleword
-let isAsmParserOnly = 1 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -842,13 +795,11 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-let isAsmParserOnly = 1 in {
def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
@@ -858,7 +809,6 @@ def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>, VEX;
-}
def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
@@ -867,7 +817,7 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>;
-let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix
+// SSE2 packed instructions with XD prefix
def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
@@ -877,7 +827,6 @@ def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq
(memop addr:$src)))]>,
XD, VEX, Requires<[HasAVX]>;
-}
def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
@@ -890,7 +839,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// Convert with truncation packed single/double fp to doubleword
-let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix
+// SSE2 packed instructions with XS prefix
def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -899,7 +848,6 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -910,7 +858,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
-let isAsmParserOnly = 1 in {
def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -921,9 +868,7 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(memop addr:$src)))]>,
XS, VEX, Requires<[HasAVX]>;
-}
-let isAsmParserOnly = 1 in {
def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
@@ -934,7 +879,6 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>, VEX;
-}
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -943,7 +887,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>;
-let isAsmParserOnly = 1 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -963,10 +906,9 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
// Convert packed single to packed double
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
@@ -982,7 +924,6 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
-let isAsmParserOnly = 1 in {
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -992,7 +933,6 @@ def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
VEX, Requires<[HasAVX]>;
-}
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1004,7 +944,6 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
TB, Requires<[HasSSE2]>;
// Convert packed double to packed single
-let isAsmParserOnly = 1 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -1024,14 +963,12 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
-let isAsmParserOnly = 1 in {
def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1040,7 +977,6 @@ def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
-}
def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1089,26 +1025,27 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
string asm, string asm_alt> {
- def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
- asm, []>;
- let mayLoad = 1 in
- def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
- asm, []>;
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def rr_alt : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
- asm_alt, []>;
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+ asm, []>;
let mayLoad = 1 in
- def rm_alt : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
- asm_alt, []>;
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+ asm, []>;
}
+
+ // Accept explicit immediate argument form instead of comparison code.
+ def rr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+ asm_alt, []>;
}
-let neverHasSideEffects = 1, isAsmParserOnly = 1 in {
+let neverHasSideEffects = 1 in {
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
@@ -1141,14 +1078,12 @@ multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
}
// Aliases to match intrinsics which expect XMM operand(s).
-let isAsmParserOnly = 1 in {
- defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
- "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
- XS, VEX_4V;
- defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
- "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
- XD, VEX_4V;
-}
+defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XS, VEX_4V;
+defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
@@ -1171,28 +1106,26 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
}
let Defs = [EFLAGS] in {
- let isAsmParserOnly = 1 in {
- defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, VEX;
- defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, OpSize, VEX;
- let Pattern = []<dag> in {
- defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
- "comiss", SSEPackedSingle>, VEX;
- defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
- "comisd", SSEPackedDouble>, OpSize, VEX;
- }
-
- defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
- load, "ucomiss", SSEPackedSingle>, VEX;
- defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
- load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
-
- defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
- load, "comiss", SSEPackedSingle>, VEX;
- defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
- load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, VEX;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, VEX;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, OpSize, VEX;
}
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, OpSize, VEX;
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", SSEPackedSingle>, TB;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
@@ -1220,41 +1153,40 @@ let Defs = [EFLAGS] in {
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Intrinsic Int, string asm, string asm_alt,
Domain d> {
- def rri : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
- def rmi : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def rri_alt : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
- asm_alt, [], d>;
- def rmi_alt : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
- asm_alt, [], d>;
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
}
-}
-let isAsmParserOnly = 1 in {
- defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
-}
+ // Accept explicit immediate argument form instead of comparison code.
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+ asm_alt, [], d>;
+}
+
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
@@ -1294,20 +1226,18 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
(vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
}
-let isAsmParserOnly = 1 in {
- defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv4f32, SSEPackedSingle>, VEX_4V;
- defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv8f32, SSEPackedSingle>, VEX_4V;
- defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
- defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv4f64, SSEPackedDouble>, OpSize, VEX_4V;
-}
+defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -1340,33 +1270,31 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
}
let AddedComplexity = 10 in {
- let isAsmParserOnly = 1 in {
- defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
- VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
- VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
- VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
- VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
-
- defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
- VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
- VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
- VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
- VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- }
+ defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+
+ defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
@@ -1404,30 +1332,28 @@ defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
-let isAsmParserOnly = 1 in {
- defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
- "movmskps", SSEPackedSingle>, VEX;
- defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
- defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
- "movmskps", SSEPackedSingle>, VEX;
- defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
+defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
- // Assembler Only
- def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
- def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
-}
+// Assembler Only
+def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
@@ -1482,13 +1408,11 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
///
multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
- let isAsmParserOnly = 1 in {
- defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
- defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
- }
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
@@ -1514,7 +1438,7 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode, int HasPat = 0,
list<list<dag>> Pattern = []> {
- let isAsmParserOnly = 1, Pattern = []<dag> in {
+ let Pattern = []<dag> in {
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
!if(HasPat, Pattern[0], // rr
@@ -1561,7 +1485,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
///
-let isAsmParserOnly = 1 in {
multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
@@ -1569,7 +1492,6 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
}
-}
// AVX 256-bit packed logical ops forms
defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
@@ -1667,38 +1589,36 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
}
// Binary Arithmetic instructions
-let isAsmParserOnly = 1 in {
- defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_s_int<0x58, "add", 0>,
- basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
- defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
- let isCommutable = 0 in {
- defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
- defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
- defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
- basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
- defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p_y_int<0x5D, "min">,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
- }
+let isCommutable = 0 in {
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -1899,7 +1819,7 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// Square root.
defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
@@ -1955,67 +1875,65 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
// SSE 1 & 2 - Non-temporal stores
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 1 in {
- def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
- def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
+def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
-
- let AddedComplexity = 400 in { // Prefer non-temporal versions
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX;
- def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
+ [(alignednontemporalstore (v2f64 VR128:$src),
addr:$dst)]>, VEX;
-
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)]>, VEX;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX;
- def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
+ [(alignednontemporalstore (v4f64 VR256:$src),
addr:$dst)]>, VEX;
- }
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
}
def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
@@ -2138,12 +2056,10 @@ def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
// SSE 1 & 2 - Load/Store XCSR register
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 1 in {
- def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
- def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
-}
+def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
@@ -2156,45 +2072,43 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let isAsmParserOnly = 1 in {
- let neverHasSideEffects = 1 in {
- def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- }
- def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
- def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
-
- let canFoldAsLoad = 1, mayLoad = 1 in {
- def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- let Predicates = [HasAVX] in {
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- }
- }
+let neverHasSideEffects = 1 in {
+def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
- let mayStore = 1 in {
- def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i256mem:$dst, VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- let Predicates = [HasAVX] in {
- def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- }
- }
+}
+}
+
+let mayStore = 1 in {
+def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
}
let neverHasSideEffects = 1 in
@@ -2226,23 +2140,11 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
}
// Intrinsic forms of MOVDQU load and store
-let isAsmParserOnly = 1 in {
-let canFoldAsLoad = 1 in
-def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
- XS, VEX, Requires<[HasAVX]>;
def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
XS, VEX, Requires<[HasAVX]>;
-}
-let canFoldAsLoad = 1 in
-def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
- XS, Requires<[HasSSE2]>;
def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
@@ -2347,7 +2249,7 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
// 128-bit Integer Arithmetic
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
@@ -2437,7 +2339,7 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
VEX_4V;
@@ -2584,7 +2486,7 @@ let Predicates = [HasSSE2] in {
// SSE2 - Packed Integer Comparison Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
0>, VEX_4V;
defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
@@ -2638,7 +2540,7 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
// SSE2 - Packed Integer Pack Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
0, 0>, VEX_4V;
defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
@@ -2676,7 +2578,7 @@ def mi : Ii8<0x70, MRMSrcMem,
}
} // ExeDomain = SSEPackedInt
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let AddedComplexity = 5 in
defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
VEX;
@@ -2724,7 +2626,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
addr:$src2))))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
0>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
@@ -2834,7 +2736,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
}
// Extract
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -2847,7 +2749,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
imm:$src2))]>;
// Insert
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
@@ -2866,13 +2768,11 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 1 in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
@@ -2885,7 +2785,6 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 1 in {
let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
@@ -2896,7 +2795,6 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
-}
let Uses = [EDI] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
@@ -2914,7 +2812,6 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Packed Double Int
-let isAsmParserOnly = 1 in {
def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2924,7 +2821,6 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
VEX;
-}
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2943,7 +2839,6 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
// Move Int Doubleword to Single Scalar
-let isAsmParserOnly = 1 in {
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
@@ -2952,7 +2847,6 @@ def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
VEX;
-}
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>;
@@ -2962,7 +2856,6 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
// Move Packed Doubleword Int to Packed Double Int
-let isAsmParserOnly = 1 in {
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2972,7 +2865,6 @@ def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
-}
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2998,14 +2890,12 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
// Move Scalar Single to Double Int
-let isAsmParserOnly = 1 in {
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
-}
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>;
@@ -3014,7 +2904,7 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
// movd / movq to XMM register zero-extends
-let AddedComplexity = 15, isAsmParserOnly = 1 in {
+let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
@@ -3038,7 +2928,6 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
}
let AddedComplexity = 20 in {
-let isAsmParserOnly = 1 in
def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3064,7 +2953,6 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
-let isAsmParserOnly = 1 in
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3077,7 +2965,6 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
// Move Packed Quadword Int to Quadword Int
-let isAsmParserOnly = 1 in
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -3091,7 +2978,6 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
(f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
// Store / copy lower 64-bits of a XMM register.
-let isAsmParserOnly = 1 in
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
@@ -3099,7 +2985,7 @@ def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
-let AddedComplexity = 20, isAsmParserOnly = 1 in
+let AddedComplexity = 20 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3124,7 +3010,7 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
-let isAsmParserOnly = 1, AddedComplexity = 15 in
+let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
@@ -3135,7 +3021,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[HasSSE2]>;
-let AddedComplexity = 20, isAsmParserOnly = 1 in
+let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl
@@ -3153,7 +3039,6 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
}
// Instructions to match in the assembler
-let isAsmParserOnly = 1 in {
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
@@ -3161,13 +3046,12 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
// Recognize "movd" with GR64 destination, but encode as a "movq"
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
-}
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3209,7 +3093,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
//===---------------------------------------------------------------------===//
// Convert Packed Double FP to Packed DW Integers
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -3237,7 +3121,7 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
// Convert Packed DW Integers to Packed Double FP
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3288,7 +3172,7 @@ def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// FIXME: Merge above classes when we have patterns for the ymm version
defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
@@ -3319,7 +3203,7 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
[]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// FIXME: Merge above classes when we have patterns for the ymm version
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
@@ -3327,7 +3211,7 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
// Move Unaligned Integer
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
@@ -3391,21 +3275,21 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX],
+let Predicates = [HasAVX],
ExeDomain = SSEPackedDouble in {
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
- f128mem, 0>, XD, VEX_4V;
+ f128mem, 0>, TB, XD, VEX_4V;
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
- f128mem, 0>, OpSize, VEX_4V;
+ f128mem, 0>, TB, OpSize, VEX_4V;
defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
- f256mem, 0>, XD, VEX_4V;
+ f256mem, 0>, TB, XD, VEX_4V;
defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
- f256mem, 0>, OpSize, VEX_4V;
+ f256mem, 0>, TB, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
ExeDomain = SSEPackedDouble in {
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
- f128mem>, XD;
+ f128mem>, TB, XD;
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
f128mem>, TB, OpSize;
}
@@ -3444,7 +3328,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
int_x86_sse3_hadd_ps, 0>, VEX_4V;
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
@@ -3496,7 +3380,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (mem_frag128 addr:$src))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
int_x86_ssse3_pabs_b_128>, VEX;
defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
@@ -3538,7 +3422,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
@@ -3630,7 +3514,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
[]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PALIGN : ssse3_palign<"palignr">;
@@ -3985,7 +3869,7 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
VEX;
defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
@@ -4051,7 +3935,7 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
VEX;
defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
@@ -4092,7 +3976,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
VEX;
defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
@@ -4134,7 +4018,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
@@ -4156,7 +4040,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -4178,7 +4062,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
@@ -4199,7 +4083,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize, REX_W;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
@@ -4222,7 +4106,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
@@ -4262,7 +4146,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
@@ -4288,7 +4172,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
@@ -4314,7 +4198,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -4347,7 +4231,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
@@ -4517,7 +4401,7 @@ multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
}
// FP round - roundss, roundps, roundsd, roundpd
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// Intrinsic form
defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
memopv4f32, memopv2f64,
@@ -4552,7 +4436,7 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
@@ -4595,7 +4479,7 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
OpSize, VEX;
}
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
@@ -4644,7 +4528,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
(bitconvert (memopv8i16 addr:$src))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
int_x86_sse41_phminposuw>, VEX;
defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
@@ -4670,7 +4554,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
@@ -4737,7 +4621,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
@@ -4769,7 +4653,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
@@ -4810,7 +4694,7 @@ let Constraints = "$src1 = $dst" in {
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_frag, Intrinsic IntId> {
@@ -4870,7 +4754,7 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
(PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
@@ -4904,7 +4788,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
0>, VEX_4V;
let Constraints = "$src1 = $dst" in
@@ -4936,8 +4820,7 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
-let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
- Predicates = [HasAVX] in {
+let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
@@ -4972,7 +4855,7 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX],
+let Predicates = [HasAVX],
Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
@@ -5007,7 +4890,7 @@ let Defs = [ECX, EFLAGS] in {
}
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
VEX;
defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
@@ -5046,7 +4929,7 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
}
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
VEX;
defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
@@ -5165,7 +5048,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
}
// Perform One Round of an AES Encryption/Decryption Flow
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
int_x86_aesni_aesenc, 0>, VEX_4V;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
@@ -5205,7 +5088,7 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
(AESDECLASTrm VR128:$src1, addr:$src2)>;
// Perform the AES InvMixColumn Transformation
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
@@ -5233,7 +5116,7 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
OpSize;
// AES Round Key Generation Assist
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -5269,7 +5152,6 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
// Only the AVX version of CLMUL instructions are described here.
// Carry-less Multiplication instructions
-let isAsmParserOnly = 1 in {
def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -5295,13 +5177,10 @@ defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
-} // isAsmParserOnly
-
//===----------------------------------------------------------------------===//
// AVX Instructions
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 1 in {
// Load from memory and broadcast to all elements of the destination operand
class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -5435,8 +5314,6 @@ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
[(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
-} // isAsmParserOnly
-
def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
@@ -5622,11 +5499,15 @@ def : Pat<(X86Movddup (bc_v2f64
// Shuffle with UNPCKLPS
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
@@ -5644,11 +5525,15 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
// Shuffle with UNPCKLPD
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 6a24d14..f73cff3 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -34,9 +34,16 @@ let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
[(int_x86_int (i8 3))]>;
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+
def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)]>;
+
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
@@ -207,10 +214,15 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+ "str{l}\t{$dst}", []>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+ "str{q}\t{$dst}", []>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
"ltr{w}\t{$src}", []>, TB;
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
@@ -393,3 +405,23 @@ let Defs = [RDX, RAX], Uses = [RCX] in
let Uses = [RDX, RAX, RCX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI] in
+ def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
+
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+ def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
+ def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
+ def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7;
+ def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7;
+ def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+ def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6;
+ def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+ def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 6686214..83bba52 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -15,7 +15,9 @@
#include "X86TargetMachine.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -69,7 +71,22 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
DwarfUsesInlineInfoSection = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfTable;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::Create(4, Context);
+ return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
+X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
+ : X86MCAsmInfoDarwin(Triple) {
}
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
@@ -89,7 +106,9 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfTable;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ DwarfRequiresFrameSection = false;
// OpenBSD has buggy support for .quad in 32-bit mode, just split into two
// .words.
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
index 5815225..2cd4c8e 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -25,6 +25,14 @@ namespace llvm {
explicit X86MCAsmInfoDarwin(const Triple &Triple);
};
+ struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+ explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+ virtual const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const;
+ };
+
struct X86ELFMCAsmInfo : public MCAsmInfo {
explicit X86ELFMCAsmInfo(const Triple &Triple);
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 0e3b571..f195a67 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -382,7 +382,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
const TargetInstrDesc &Desc,
raw_ostream &OS) const {
bool HasVEX_4V = false;
- if ((TSFlags >> 32) & X86II::VEX_4V)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
HasVEX_4V = true;
// VEX_R: opcode externsion equivalent to REX.R in
@@ -446,10 +446,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (TSFlags & X86II::OpSize)
VEX_PP = 0x01;
- if ((TSFlags >> 32) & X86II::VEX_W)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
VEX_W = 1;
- if ((TSFlags >> 32) & X86II::VEX_L)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
switch (TSFlags & X86II::Op0Mask) {
@@ -470,6 +470,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::XD: // F2 0F
VEX_PP = 0x3;
break;
+ case X86II::A6: // Bypass: Not used by VEX
+ case X86II::A7: // Bypass: Not used by VEX
case X86II::TB: // Bypass: Not used by VEX
case 0:
break; // No prefix!
@@ -512,13 +514,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
}
// To only check operands before the memory address ones, start
- // the search from the begining
+ // the search from the beginning
if (IsDestMem)
CurOp = 0;
// If the last register should be encoded in the immediate field
// do not use any bit from VEX prefix to this register, ignore it
- if ((TSFlags >> 32) & X86II::VEX_I8IMM)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM)
NumOps--;
for (; CurOp != NumOps; ++CurOp) {
@@ -742,6 +744,8 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::TB: // Two-byte opcode prefix
case X86II::T8: // 0F 38
case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
Need0FPrefix = true;
break;
case X86II::TF: // F2 0F 38
@@ -786,6 +790,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::TA: // 0F 3A
EmitByte(0x3A, CurByte, OS);
break;
+ case X86II::A6: // 0F A6
+ EmitByte(0xA6, CurByte, OS);
+ break;
+ case X86II::A7: // 0F A7
+ EmitByte(0xA7, CurByte, OS);
+ break;
}
}
@@ -819,9 +829,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = false;
- if ((TSFlags >> 32) & X86II::VEX)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
HasVEXPrefix = true;
- if ((TSFlags >> 32) & X86II::VEX_4V)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
HasVEX_4V = true;
@@ -837,7 +847,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
- if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
BaseOpcode = 0x0F; // Weird 3DNow! encoding.
unsigned SrcRegNum = 0;
@@ -994,7 +1004,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
- if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
bool IsExtReg =
X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
@@ -1017,7 +1027,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
}
- if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 2f6bd88..37fb0fe 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -308,6 +308,33 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return 0;
}
+const TargetRegisterClass*
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ do {
+ switch (Super->getID()) {
+ case X86::GR8RegClassID:
+ case X86::GR16RegClassID:
+ case X86::GR32RegClassID:
+ case X86::GR64RegClassID:
+ case X86::FR32RegClassID:
+ case X86::FR64RegClassID:
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ case X86::VR128RegClassID:
+ case X86::VR256RegClassID:
+ // Don't return a super-class that would shrink the spill size.
+ // That can happen with the vector and float classes.
+ if (Super->getSize() == RC->getSize())
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
const TargetRegisterClass *
X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
switch (Kind) {
@@ -337,7 +364,27 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
else
return &X86::GR32RegClass;
}
- return NULL;
+ return RC;
+}
+
+unsigned
+X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case X86::GR32RegClassID:
+ return 4 - FPDiff;
+ case X86::GR64RegClassID:
+ return 12 - FPDiff;
+ case X86::VR128RegClassID:
+ return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+ case X86::VR64RegClassID:
+ return 4;
+ }
}
const unsigned *
@@ -450,7 +497,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
// FIXME: It's more complicated than this...
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
- "Stack realignment in presense of dynamic allocas is not supported");
+ "Stack realignment in presence of dynamic allocas is not supported");
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 064be64..9970c52 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -91,6 +91,9 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
@@ -101,6 +104,9 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
/// getCalleeSavedRegs - Return a null-terminated list of all of the
/// callee-save registers on this target.
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 612fac2..fd7a247 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -46,7 +46,8 @@ let Namespace = "X86" in {
def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>;
def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>;
- // X86-64 only
+ // X86-64 only, requires REX.
+ let CostPerUse = 1 in {
def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>;
def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>;
def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>;
@@ -59,6 +60,7 @@ let Namespace = "X86" in {
def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>;
def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
+ }
// High registers. On x86-64, these cannot be used in any instruction
// with a REX prefix.
@@ -82,8 +84,8 @@ let Namespace = "X86" in {
}
def IP : Register<"ip">, DwarfRegNum<[16]>;
- // X86-64 only
- let SubRegIndices = [sub_8bit] in {
+ // X86-64 only, requires REX.
+ let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
@@ -105,7 +107,8 @@ let Namespace = "X86" in {
def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
- // X86-64 only
+ // X86-64 only, requires REX
+ let CostPerUse = 1 in {
def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>;
@@ -114,7 +117,7 @@ let Namespace = "X86" in {
def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
- }
+ }}
// 64-bit registers, X86-64 only
let SubRegIndices = [sub_32bit] in {
@@ -127,6 +130,8 @@ let Namespace = "X86" in {
def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+ // These also require REX.
+ let CostPerUse = 1 in {
def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
@@ -136,7 +141,7 @@ let Namespace = "X86" in {
def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
- }
+ }}
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
@@ -170,6 +175,7 @@ let Namespace = "X86" in {
def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
// X86-64 only
+ let CostPerUse = 1 in {
def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>;
def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>;
def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
@@ -178,7 +184,7 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
- }
+ }}
// YMM Registers, used by AVX instructions
let SubRegIndices = [sub_xmm] in {
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 42e8193..02754f9 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -178,7 +178,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
- // This requires the copy size to be a constant, preferrably
+ // This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 1ee7312..ba5864e 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -144,7 +144,8 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
/// passed as the second argument. Otherwise it returns null.
const char *X86Subtarget::getBZeroEntry() const {
// Darwin 10 has a __bzero entry point for this purpose.
- if (getDarwinVers() >= 10)
+ if (getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 6))
return "__bzero";
return 0;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 0a62a02..286a798 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -165,9 +165,15 @@ public:
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
- bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
- bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; }
- bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; }
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetFreeBSD() const {
+ return TargetTriple.getOS() == Triple::FreeBSD;
+ }
+ bool isTargetSolaris() const {
+ return TargetTriple.getOS() == Triple::Solaris;
+ }
// ELF is a reasonably sane default and the only other X86 targets we
// support are Darwin and Windows. Just use "not those".
@@ -215,13 +221,6 @@ public:
return PICStyle == PICStyles::StubDynamicNoPIC ||
PICStyle == PICStyles::StubPIC; }
- /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
- /// 10 = Snow Leopard, etc.
- unsigned getDarwinVers() const {
- if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber();
- return 0;
- }
-
/// ClassifyGlobalReference - Classify a global variable reference for the
/// current subtarget according to how we should reference it in a non-pcrel
/// context.
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 889c824..7483329 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -26,19 +26,18 @@ using namespace llvm;
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
- switch (TheTriple.getOS()) {
- case Triple::Darwin:
- return new X86MCAsmInfoDarwin(TheTriple);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (TheTriple.getEnvironment() == Triple::MachO)
- return new X86MCAsmInfoDarwin(TheTriple);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ if (TheTriple.getArch() == Triple::x86_64)
+ return new X86_64MCAsmInfoDarwin(TheTriple);
else
- return new X86MCAsmInfoCOFF(TheTriple);
- default:
- return new X86ELFMCAsmInfo(TheTriple);
+ return new X86MCAsmInfoDarwin(TheTriple);
}
+
+ if (TheTriple.isOSWindows())
+ return new X86MCAsmInfoCOFF(TheTriple);
+
+ return new X86ELFMCAsmInfo(TheTriple);
}
static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
@@ -48,19 +47,14 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
bool RelaxAll,
bool NoExecStack) {
Triple TheTriple(TT);
- switch (TheTriple.getOS()) {
- case Triple::Darwin:
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (TheTriple.getEnvironment() == Triple::MachO)
- return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
- else
- return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
- default:
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
- }
+
+ if (TheTriple.isOSWindows())
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeX86Target() {
@@ -96,11 +90,11 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: X86TargetMachine(T, TT, FS, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
- "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
(getSubtargetImpl()->isTargetCygMing() ||
getSubtargetImpl()->isTargetWindows()) ?
- "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
- "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
@@ -111,7 +105,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: X86TargetMachine(T, TT, FS, true),
- DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index c15dfbb..1231798 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -38,6 +38,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
+MCSymbol *X8664_MachoTargetObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
if (TM.getRelocationModel() == Reloc::PIC_)
return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
@@ -52,7 +58,7 @@ unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
return DW_EH_PE_absptr;
}
-unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const {
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const {
if (TM.getRelocationModel() == Reloc::PIC_)
return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
else
@@ -91,17 +97,14 @@ unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
return DW_EH_PE_absptr;
}
-unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | (Model == CodeModel::Small ||
- Model == CodeModel::Medium ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const {
+ if (CFI)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- if (Model == CodeModel::Small || Model == CodeModel::Medium)
- return DW_EH_PE_udata4;
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- return DW_EH_PE_absptr;
+ return DW_EH_PE_udata4;
}
unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index f2fd49c..e21b5bf 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -25,6 +25,12 @@ namespace llvm {
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const;
+
+ // getCFIPersonalitySymbol - The symbol that gets passed to
+ // .cfi_personality.
+ virtual MCSymbol *
+ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const;
};
class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -34,7 +40,7 @@ namespace llvm {
:TM(tm) { }
virtual unsigned getPersonalityEncoding() const;
virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding() const;
+ virtual unsigned getFDEEncoding(bool CFI) const;
virtual unsigned getTTypeEncoding() const;
};
@@ -45,7 +51,7 @@ namespace llvm {
:TM(tm) { }
virtual unsigned getPersonalityEncoding() const;
virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding() const;
+ virtual unsigned getFDEEncoding(bool CFI) const;
virtual unsigned getTTypeEncoding() const;
};
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index fc8a07a..6bec9f9 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -30,8 +30,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <queue>
-#include <set>
using namespace llvm;
/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
@@ -49,7 +47,8 @@ namespace {
Subtarget(*TM.getSubtargetImpl()) { }
SDNode *Select(SDNode *N);
-
+ SDNode *SelectBRIND(SDNode *N);
+
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
inline SDValue getI32Imm(unsigned Imm) {
@@ -154,62 +153,133 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
- EVT NVT = N->getValueType(0);
- if (NVT == MVT::i32) {
- switch (N->getOpcode()) {
- default: break;
- case ISD::Constant: {
- uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
- if (immMskBitp(N)) {
- // Transformation function: get the size of a mask
- // Look for the first non-zero bit
- SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
- return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
- MVT::i32, MskSize);
- }
- else if (!isUInt<16>(Val)) {
- SDValue CPIdx =
- CurDAG->getTargetConstantPool(ConstantInt::get(
- Type::getInt32Ty(*CurDAG->getContext()), Val),
- TLI.getPointerTy());
- return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
- MVT::Other, CPIdx,
- CurDAG->getEntryNode());
- }
- break;
- }
- case XCoreISD::LADD: {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- N->getOperand(2) };
- return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
- Ops, 3);
- }
- case XCoreISD::LSUB: {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- N->getOperand(2) };
- return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
- Ops, 3);
- }
- case XCoreISD::MACCU: {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- N->getOperand(2), N->getOperand(3) };
- return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
- Ops, 4);
- }
- case XCoreISD::MACCS: {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- N->getOperand(2), N->getOperand(3) };
- return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
- Ops, 4);
- }
- case XCoreISD::LMUL: {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- N->getOperand(2), N->getOperand(3) };
- return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
- Ops, 4);
- }
- // Other cases are autogenerated.
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+ if (immMskBitp(N)) {
+ // Transformation function: get the size of a mask
+ // Look for the first non-zero bit
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
+ return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
+ MVT::i32, MskSize);
+ }
+ else if (!isUInt<16>(Val)) {
+ SDValue CPIdx =
+ CurDAG->getTargetConstantPool(ConstantInt::get(
+ Type::getInt32Ty(*CurDAG->getContext()), Val),
+ TLI.getPointerTy());
+ return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
+ MVT::Other, CPIdx,
+ CurDAG->getEntryNode());
}
+ break;
+ }
+ case XCoreISD::LADD: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case XCoreISD::LSUB: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) };
+ return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+ Ops, 3);
+ }
+ case XCoreISD::MACCU: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::MACCS: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case XCoreISD::LMUL: {
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3) };
+ return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
+ Ops, 4);
+ }
+ case ISD::BRIND:
+ if (SDNode *ResNode = SelectBRIND(N))
+ return ResNode;
+ break;
+ // Other cases are autogenerated.
}
return SelectCode(N);
}
+
+/// Given a chain return a new chain where any appearance of Old is replaced
+/// by New. There must be at most one instruction between Old and Chain and
+/// this instruction must be a TokenFactor. Returns an empty SDValue if
+/// these conditions don't hold.
+static SDValue
+replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New)
+{
+ if (Chain == Old)
+ return New;
+ if (Chain->getOpcode() != ISD::TokenFactor)
+ return SDValue();
+ SmallVector<SDValue, 8> Ops;
+ bool found = false;
+ for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) {
+ if (Chain->getOperand(i) == Old) {
+ Ops.push_back(New);
+ found = true;
+ } else {
+ Ops.push_back(Chain->getOperand(i));
+ }
+ }
+ if (!found)
+ return SDValue();
+ return CurDAG->getNode(ISD::TokenFactor, Chain->getDebugLoc(), MVT::Other,
+ &Ops[0], Ops.size());
+}
+
+SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
+ DebugLoc dl = N->getDebugLoc();
+ // (brind (int_xcore_checkevent (addr)))
+ SDValue Chain = N->getOperand(0);
+ SDValue Addr = N->getOperand(1);
+ if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return 0;
+ unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue();
+ if (IntNo != Intrinsic::xcore_checkevent)
+ return 0;
+ SDValue nextAddr = Addr->getOperand(2);
+ SDValue CheckEventChainOut(Addr.getNode(), 1);
+ if (!CheckEventChainOut.use_empty()) {
+ // If the chain out of the checkevent intrinsic is an operand of the
+ // indirect branch or used in a TokenFactor which is the operand of the
+ // indirect branch then build a new chain which uses the chain coming into
+ // the checkevent intrinsic instead.
+ SDValue CheckEventChainIn = Addr->getOperand(0);
+ SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut,
+ CheckEventChainIn);
+ if (!NewChain.getNode())
+ return 0;
+ Chain = NewChain;
+ }
+ // Enable events on the thread using setsr 1 and then disable them immediately
+ // after with clrsr 1. If any resources owned by the thread are ready an event
+ // will be taken. If no resource is ready we branch to the address which was
+ // the operand to the checkevent intrinsic.
+ SDValue constOne = getI32Imm(1);
+ SDValue Glue =
+ SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue,
+ constOne, Chain), 0);
+ Glue =
+ SDValue(CurDAG->getMachineNode(XCore::CLRSR_branch_u6, dl, MVT::Glue,
+ constOne, Glue), 0);
+ if (nextAddr->getOpcode() == XCoreISD::PCRelativeWrapper &&
+ nextAddr->getOperand(0)->getOpcode() == ISD::TargetBlockAddress) {
+ return CurDAG->SelectNodeTo(N, XCore::BRFU_lu6, MVT::Other,
+ nextAddr->getOperand(0), Glue);
+ }
+ return CurDAG->SelectNodeTo(N, XCore::BAU_1r, MVT::Other, nextAddr, Glue);
+}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 4817787..5987e8b 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -37,8 +37,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/VectorExtras.h"
-#include <queue>
-#include <set>
using namespace llvm;
const char *XCoreTargetLowering::
@@ -967,7 +965,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emited instructions must be
+ // The InFlag in necessary since all emitted instructions must be
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index ecdd4cb..789546e 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -308,6 +308,16 @@ multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
!strconcat(OpcStr, " $b"),
[(OpNode immU16:$b)]>;
}
+multiclass FU6_LU6_int<string OpcStr, Intrinsic Int> {
+ def _u6: _FU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(Int immU6:$b)]>;
+ def _lu6: _FLU6<
+ (outs), (ins i32imm:$b),
+ !strconcat(OpcStr, " $b"),
+ [(Int immU16:$b)]>;
+}
multiclass FU6_LU6_np<string OpcStr> {
def _u6: _FU6<
@@ -638,8 +648,8 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
}
}
-// TODO extdp, kentsp, krestsp, blat, setsr
-// clrsr, getsr, kalli
+// TODO extdp, kentsp, krestsp, blat
+// getsr, kalli
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
def BRBU_u6 : _FU6<
(outs),
@@ -678,6 +688,17 @@ def LDAWCP_lu6: _FLRU6<
"ldaw r11, cp[$a]",
[(set R11, ADDRcpii:$a)]>;
+defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>;
+
+defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>;
+
+// setsr may cause a branch if it is used to enable events. clrsr may
+// branch if it is executed while events are enabled.
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in {
+defm SETSR_branch : FU6_LU6_np<"setsr">;
+defm CLRSR_branch : FU6_LU6_np<"clrsr">;
+}
+
// U10
// TODO ldwcpl, blacp
@@ -718,7 +739,7 @@ def BL_lu10 : _FLU10<
}
// Two operand short
-// TODO getr, getst
+// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg)
def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
"not $dst, $b",
[(set GRRegs:$dst, (not GRRegs:$b))]>;
@@ -727,8 +748,6 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
"neg $dst, $b",
[(set GRRegs:$dst, (ineg GRRegs:$b))]>;
-// TODO setd, eet, eef, testwct, tinitpc, tinitdp,
-// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg)
let Constraints = "$src1 = $dst" in {
let neverHasSideEffects = 1 in
def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
@@ -816,9 +835,29 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"setd res[$r], $val",
[(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "getst $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>;
+
+def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:sp, $src",
+ [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>;
+
+def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:pc, $src",
+ [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>;
+
+def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:cp, $src",
+ [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>;
+
+def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:dp, $src",
+ [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>;
+
// Two operand long
-// TODO setclk, setrdy, setpsc, endin, peek,
-// getd, testlcl, tinitlr, getps, setps
+// TODO endin, peek,
+// getd, testlcl
def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
"bitrev $dst, $src",
[(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
@@ -839,10 +878,41 @@ def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"settw res[$r], $val",
[(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+ "get $dst, ps[$src]",
+ [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>;
+
+def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "set ps[$src1], $src2",
+ [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>;
+
+def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src),
+ "init t[$t]:lr, $src",
+ [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>;
+
+def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setclk res[$src1], $src2",
+ [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setrdy res[$src1], $src2",
+ [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>;
+
+def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2),
+ "setpsc res[$src1], $src2",
+ [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>;
+
// One operand short
-// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp
+// TODO edu, eeu, waitet, waitef, tstart, clrtp
// setdp, setcp, setev, kcall
// dgetreg
+def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i),
+ "msync res[$i]",
+ [(int_xcore_msync GRRegs:$i)]>;
+def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i),
+ "mjoin res[$i]",
+ [(int_xcore_mjoin GRRegs:$i)]>;
+
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
"bau $addr",
@@ -899,7 +969,7 @@ def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
[(int_xcore_eeu GRRegs:$r)]>;
// Zero operand short
-// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
// dentsp, drestsp
@@ -910,6 +980,10 @@ def GETID_0R : _F0R<(outs), (ins),
"get r11, id",
[(set R11, (int_xcore_getid))]>;
+def SSYNC_0r : _F0R<(outs), (ins),
+ "ssync",
+ [(int_xcore_ssync)]>;
+
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
hasSideEffects = 1 in
def WAITEU_0R : _F0R<(outs), (ins),
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 56c0879..0287a51 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -104,6 +104,11 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
return TFI->hasFP(MF);
}
+bool
+XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ return false;
+}
+
// This function eliminates ADJCALLSTACKDOWN,
// ADJCALLSTACKUP pseudo instructions
void XCoreRegisterInfo::
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 2185755..770483b 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -48,6 +48,8 @@ public:
bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool useFPForScavengingIndex(const MachineFunction &MF) const;
+
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
OpenPOWER on IntegriCloud